| @@ -19,13 +19,12 @@ parser = Lark(r""" | |||||
| start: _NL? section+ | start: _NL? section+ | ||||
| section: "[" NAME "]" _NL item+ | section: "[" NAME "]" _NL item+ | ||||
| item: NAME "=" VALUE _NL | item: NAME "=" VALUE _NL | ||||
| NAME: /[a-zA-Z_]\w*/ | |||||
| VALUE: /.*/ | |||||
| VALUE: /./* | |||||
| %import common.CNAME -> NAME | |||||
| %import common.NEWLINE -> _NL | |||||
| _NL: /(\r?\n)+/ | |||||
| %ignore /[\t \f]+/ | |||||
| %ignore /\#[^\n]*/ | |||||
| %import common.WS_INLINE | |||||
| %ignore WS_INLINE | |||||
| """, parser="lalr", lexer="contextual") | """, parser="lalr", lexer="contextual") | ||||
| @@ -12,25 +12,21 @@ | |||||
| # See examples/conf.py for an example of that approach. | # See examples/conf.py for an example of that approach. | ||||
| # | # | ||||
| from lark import Lark, Transformer | |||||
| from lark import Lark | |||||
| parser = Lark(r""" | parser = Lark(r""" | ||||
| start: _nl? section+ | |||||
| section: "[" name "]" _nl item+ | |||||
| item: name "=" value _nl | |||||
| name: /[a-zA-Z_]/ /\w/* | |||||
| value: /./+ | |||||
| _nl: (_CR? _LF)+ | |||||
| _CR : /\r/ | |||||
| _LF : /\n/ | |||||
| start: _NL? section+ | |||||
| section: "[" NAME "]" _NL item+ | |||||
| item: NAME "=" VALUE _NL | |||||
| VALUE: /./* | |||||
| %import common.CNAME -> NAME | |||||
| %import common.NEWLINE -> _NL | |||||
| %import common.WS_INLINE | |||||
| %ignore WS_INLINE | |||||
| """, lexer=None) | """, lexer=None) | ||||
| class RestoreTokens(Transformer): | |||||
| value = ''.join | |||||
| name = ''.join | |||||
| def test(): | def test(): | ||||
| sample_conf = """ | sample_conf = """ | ||||
| [bla] | [bla] | ||||
| @@ -40,7 +36,7 @@ this="that",4 | |||||
| """ | """ | ||||
| r = parser.parse(sample_conf) | r = parser.parse(sample_conf) | ||||
| print(RestoreTokens().transform(r).pretty()) | |||||
| print r.pretty() | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| test() | test() | ||||
| @@ -39,3 +39,7 @@ CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)* | |||||
| WS_INLINE: (" "|/\t/)+ | WS_INLINE: (" "|/\t/)+ | ||||
| WS: /[ \t\f\r\n]/+ | WS: /[ \t\f\r\n]/+ | ||||
| CR : /\r/ | |||||
| LF : /\n/ | |||||
| NEWLINE: (CR? LF)+ | |||||
| @@ -119,21 +119,23 @@ class Lark: | |||||
| assert not self.options.profile, "Feature temporarily disabled" | assert not self.options.profile, "Feature temporarily disabled" | ||||
| self.profiler = Profiler() if self.options.profile else None | self.profiler = Profiler() if self.options.profile else None | ||||
| lexer = self.options.lexer | |||||
| if lexer == 'auto': | |||||
| if self.options.parser == 'lalr': | |||||
| lexer = 'standard' | |||||
| elif self.options.parser == 'earley': | |||||
| lexer = 'standard' | |||||
| self.options.lexer = lexer | |||||
| self.grammar = load_grammar(grammar) | self.grammar = load_grammar(grammar) | ||||
| tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=True) | |||||
| tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer)) | |||||
| self.ignore_tokens = self.grammar.extra['ignore'] | self.ignore_tokens = self.grammar.extra['ignore'] | ||||
| self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | ||||
| if self.options.lexer == 'auto': | |||||
| if self.options.parser == 'lalr': | |||||
| self.options.lexer = 'standard' | |||||
| elif self.options.parser == 'earley': | |||||
| self.options.lexer = 'standard' | |||||
| if self.options.parser: | if self.options.parser: | ||||
| self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
| elif self.options.lexer: | |||||
| elif lexer: | |||||
| self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
| if self.profiler: self.profiler.enter_section('outside_lark') | if self.profiler: self.profiler.enter_section('outside_lark') | ||||
| @@ -239,6 +239,15 @@ class ExtractAnonTokens(InlineTransformer): | |||||
| self.re_reverse = {td.pattern.value: td.name for td in tokens if isinstance(td.pattern, PatternRE)} | self.re_reverse = {td.pattern.value: td.name for td in tokens if isinstance(td.pattern, PatternRE)} | ||||
| self.i = 0 | self.i = 0 | ||||
| def range(self, start, end): | |||||
| assert start.type == end.type == 'STRING' | |||||
| start = start.value[1:-1] | |||||
| end = end.value[1:-1] | |||||
| assert len(start) == len(end) == 1 | |||||
| regexp = '/[%s-%s]/' % (start, end) | |||||
| t = Token('REGEXP', regexp) | |||||
| return self.tokenvalue(t) | |||||
| def tokenvalue(self, token): | def tokenvalue(self, token): | ||||
| value = token.value[1:-1] | value = token.value[1:-1] | ||||
| if token.type == 'STRING': | if token.type == 'STRING': | ||||
| @@ -325,8 +334,19 @@ class Grammar: | |||||
| self.extra = extra | self.extra = extra | ||||
| def compile(self, lexer=False): | def compile(self, lexer=False): | ||||
| assert lexer | |||||
| # assert lexer | |||||
| if not lexer: | |||||
| self.rule_defs += self.token_defs | |||||
| self.token_defs = [] | |||||
| for name, tree in self.rule_defs: | |||||
| for tokenvalue in tree.find_data('tokenvalue'): | |||||
| value ,= tokenvalue.children | |||||
| if value.type == 'STRING': | |||||
| assert value[0] == value[-1] == '"' | |||||
| if len(value)>3: | |||||
| tokenvalue.data = 'expansion' | |||||
| tokenvalue.children = [T('tokenvalue', [Token('STRING', '"%s"'%ch)]) for ch in value[1:-1]] | |||||
| tokendefs = list(self.token_defs) | tokendefs = list(self.token_defs) | ||||
| # ================= | # ================= | ||||
| @@ -6,6 +6,7 @@ from .lexer import Lexer, ContextualLexer, Token | |||||
| from .common import is_terminal, GrammarError, ParserConf | from .common import is_terminal, GrammarError, ParserConf | ||||
| from .parsers import lalr_parser, earley, nearley | from .parsers import lalr_parser, earley, nearley | ||||
| from .parsers.grammar_analysis import Rule | from .parsers.grammar_analysis import Rule | ||||
| from .tree import Transformer | |||||
| class WithLexer: | class WithLexer: | ||||
| def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
| @@ -121,10 +122,16 @@ class Nearley_NoLex: | |||||
| class Earley_NoLex: | class Earley_NoLex: | ||||
| def __init__(self, lexer_conf, parser_conf): | def __init__(self, lexer_conf, parser_conf): | ||||
| self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)} | |||||
| rules = [] | |||||
| for name, exp, alias in parser_conf.rules: | |||||
| name = self.tokens_to_convert.get(name, name) | |||||
| exp = [self.tokens_to_convert.get(x, x) for x in exp] | |||||
| rules.append((name, exp, alias)) | |||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| rules = [(n, list(self._prepare_expansion(x)), a) | |||||
| for n,x,a in parser_conf.rules] | |||||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules] | |||||
| self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | ||||
| @@ -142,7 +149,16 @@ class Earley_NoLex: | |||||
| def parse(self, text): | def parse(self, text): | ||||
| res = self.parser.parse(text) | res = self.parser.parse(text) | ||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
| return res[0] | |||||
| res = res[0] | |||||
| class RestoreTokens(Transformer): | |||||
| pass | |||||
| for t in self.tokens_to_convert: | |||||
| setattr(RestoreTokens, t, ''.join) | |||||
| res = RestoreTokens().transform(res) | |||||
| return res | |||||
| def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
| @@ -39,9 +39,19 @@ class TestParsers(unittest.TestCase): | |||||
| l2 = g.parse('(a,b,c,*x)') | l2 = g.parse('(a,b,c,*x)') | ||||
| assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | ||||
| def test_earley_nolex(self): | |||||
| g = Lark("""start: A "b" c | |||||
| A: "a"+ | |||||
| c: "abc" | |||||
| """, parser="earley", lexer=None) | |||||
| x = g.parse('aaaababc') | |||||
| class TestEarley(unittest.TestCase): | class TestEarley(unittest.TestCase): | ||||
| pass | pass | ||||
| def _make_parser_test(LEXER, PARSER): | def _make_parser_test(LEXER, PARSER): | ||||
| def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
| return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | ||||