@@ -19,13 +19,12 @@ parser = Lark(r""" | |||
start: _NL? section+ | |||
section: "[" NAME "]" _NL item+ | |||
item: NAME "=" VALUE _NL | |||
NAME: /[a-zA-Z_]\w*/ | |||
VALUE: /.*/ | |||
VALUE: /./* | |||
%import common.CNAME -> NAME | |||
%import common.NEWLINE -> _NL | |||
_NL: /(\r?\n)+/ | |||
%ignore /[\t \f]+/ | |||
%ignore /\#[^\n]*/ | |||
%import common.WS_INLINE | |||
%ignore WS_INLINE | |||
""", parser="lalr", lexer="contextual") | |||
@@ -12,25 +12,21 @@ | |||
# See examples/conf.py for an example of that approach. | |||
# | |||
from lark import Lark, Transformer | |||
from lark import Lark | |||
parser = Lark(r""" | |||
start: _nl? section+ | |||
section: "[" name "]" _nl item+ | |||
item: name "=" value _nl | |||
name: /[a-zA-Z_]/ /\w/* | |||
value: /./+ | |||
_nl: (_CR? _LF)+ | |||
_CR : /\r/ | |||
_LF : /\n/ | |||
start: _NL? section+ | |||
section: "[" NAME "]" _NL item+ | |||
item: NAME "=" VALUE _NL | |||
VALUE: /./* | |||
%import common.CNAME -> NAME | |||
%import common.NEWLINE -> _NL | |||
%import common.WS_INLINE | |||
%ignore WS_INLINE | |||
""", lexer=None) | |||
class RestoreTokens(Transformer): | |||
value = ''.join | |||
name = ''.join | |||
def test(): | |||
sample_conf = """ | |||
[bla] | |||
@@ -40,7 +36,7 @@ this="that",4 | |||
""" | |||
r = parser.parse(sample_conf) | |||
print(RestoreTokens().transform(r).pretty()) | |||
print r.pretty() | |||
if __name__ == '__main__': | |||
test() |
@@ -39,3 +39,7 @@ CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)* | |||
WS_INLINE: (" "|/\t/)+ | |||
WS: /[ \t\f\r\n]/+ | |||
CR : /\r/ | |||
LF : /\n/ | |||
NEWLINE: (CR? LF)+ | |||
@@ -119,21 +119,23 @@ class Lark: | |||
assert not self.options.profile, "Feature temporarily disabled" | |||
self.profiler = Profiler() if self.options.profile else None | |||
lexer = self.options.lexer | |||
if lexer == 'auto': | |||
if self.options.parser == 'lalr': | |||
lexer = 'standard' | |||
elif self.options.parser == 'earley': | |||
lexer = 'standard' | |||
self.options.lexer = lexer | |||
self.grammar = load_grammar(grammar) | |||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=True) | |||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer)) | |||
self.ignore_tokens = self.grammar.extra['ignore'] | |||
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | |||
if self.options.lexer == 'auto': | |||
if self.options.parser == 'lalr': | |||
self.options.lexer = 'standard' | |||
elif self.options.parser == 'earley': | |||
self.options.lexer = 'standard' | |||
if self.options.parser: | |||
self.parser = self._build_parser() | |||
elif self.options.lexer: | |||
elif lexer: | |||
self.lexer = self._build_lexer() | |||
if self.profiler: self.profiler.enter_section('outside_lark') | |||
@@ -239,6 +239,15 @@ class ExtractAnonTokens(InlineTransformer): | |||
self.re_reverse = {td.pattern.value: td.name for td in tokens if isinstance(td.pattern, PatternRE)} | |||
self.i = 0 | |||
def range(self, start, end): | |||
assert start.type == end.type == 'STRING' | |||
start = start.value[1:-1] | |||
end = end.value[1:-1] | |||
assert len(start) == len(end) == 1 | |||
regexp = '/[%s-%s]/' % (start, end) | |||
t = Token('REGEXP', regexp) | |||
return self.tokenvalue(t) | |||
def tokenvalue(self, token): | |||
value = token.value[1:-1] | |||
if token.type == 'STRING': | |||
@@ -325,8 +334,19 @@ class Grammar: | |||
self.extra = extra | |||
def compile(self, lexer=False): | |||
assert lexer | |||
# assert lexer | |||
if not lexer: | |||
self.rule_defs += self.token_defs | |||
self.token_defs = [] | |||
for name, tree in self.rule_defs: | |||
for tokenvalue in tree.find_data('tokenvalue'): | |||
value ,= tokenvalue.children | |||
if value.type == 'STRING': | |||
assert value[0] == value[-1] == '"' | |||
if len(value)>3: | |||
tokenvalue.data = 'expansion' | |||
tokenvalue.children = [T('tokenvalue', [Token('STRING', '"%s"'%ch)]) for ch in value[1:-1]] | |||
tokendefs = list(self.token_defs) | |||
# ================= | |||
@@ -6,6 +6,7 @@ from .lexer import Lexer, ContextualLexer, Token | |||
from .common import is_terminal, GrammarError, ParserConf | |||
from .parsers import lalr_parser, earley, nearley | |||
from .parsers.grammar_analysis import Rule | |||
from .tree import Transformer | |||
class WithLexer: | |||
def __init__(self, lexer_conf): | |||
@@ -121,10 +122,16 @@ class Nearley_NoLex: | |||
class Earley_NoLex: | |||
def __init__(self, lexer_conf, parser_conf): | |||
self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)} | |||
rules = [] | |||
for name, exp, alias in parser_conf.rules: | |||
name = self.tokens_to_convert.get(name, name) | |||
exp = [self.tokens_to_convert.get(x, x) for x in exp] | |||
rules.append((name, exp, alias)) | |||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
rules = [(n, list(self._prepare_expansion(x)), a) | |||
for n,x,a in parser_conf.rules] | |||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules] | |||
self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | |||
@@ -142,7 +149,16 @@ class Earley_NoLex: | |||
def parse(self, text): | |||
res = self.parser.parse(text) | |||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
return res[0] | |||
res = res[0] | |||
class RestoreTokens(Transformer): | |||
pass | |||
for t in self.tokens_to_convert: | |||
setattr(RestoreTokens, t, ''.join) | |||
res = RestoreTokens().transform(res) | |||
return res | |||
def get_frontend(parser, lexer): | |||
@@ -39,9 +39,19 @@ class TestParsers(unittest.TestCase): | |||
l2 = g.parse('(a,b,c,*x)') | |||
assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | |||
def test_earley_nolex(self): | |||
g = Lark("""start: A "b" c | |||
A: "a"+ | |||
c: "abc" | |||
""", parser="earley", lexer=None) | |||
x = g.parse('aaaababc') | |||
class TestEarley(unittest.TestCase): | |||
pass | |||
def _make_parser_test(LEXER, PARSER): | |||
def _Lark(grammar, **kwargs): | |||
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||