@@ -185,7 +185,7 @@ class LarkOptions(Serialize): | |||||
# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. | # Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. | ||||
# These option are only used outside of `load_grammar`. | # These option are only used outside of `load_grammar`. | ||||
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} | |||||
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} | |||||
_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | ||||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | ||||
@@ -343,12 +343,10 @@ class Lark(Serialize): | |||||
rule.options.priority = None | rule.options.priority = None | ||||
# TODO Deprecate lexer_callbacks? | # TODO Deprecate lexer_callbacks? | ||||
lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals) | |||||
if self.options.transformer | |||||
else {}) | |||||
lexer_callbacks.update(self.options.lexer_callbacks) | |||||
self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes) | |||||
self.lexer_conf = LexerConf( | |||||
self.terminals, re_module, self.ignore_tokens, self.options.postlex, | |||||
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes | |||||
) | |||||
if self.options.parser: | if self.options.parser: | ||||
self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
@@ -375,8 +373,7 @@ class Lark(Serialize): | |||||
return TraditionalLexer(lexer_conf) | return TraditionalLexer(lexer_conf) | ||||
def _prepare_callbacks(self): | def _prepare_callbacks(self): | ||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
self._callbacks = None | |||||
self._callbacks = {} | |||||
# we don't need these callbacks if we aren't building a tree | # we don't need these callbacks if we aren't building a tree | ||||
if self.options.ambiguity != 'forest': | if self.options.ambiguity != 'forest': | ||||
self._parse_tree_builder = ParseTreeBuilder( | self._parse_tree_builder = ParseTreeBuilder( | ||||
@@ -387,11 +384,13 @@ class Lark(Serialize): | |||||
self.options.maybe_placeholders | self.options.maybe_placeholders | ||||
) | ) | ||||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | ||||
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals)) | |||||
def _build_parser(self): | def _build_parser(self): | ||||
self._prepare_callbacks() | self._prepare_callbacks() | ||||
parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | ||||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||||
return parser_class(self.lexer_conf, parser_conf, options=self.options) | |||||
def save(self, f): | def save(self, f): | ||||
"""Saves the instance into the given file object | """Saves the instance into the given file object | ||||
@@ -410,6 +409,16 @@ class Lark(Serialize): | |||||
inst = cls.__new__(cls) | inst = cls.__new__(cls) | ||||
return inst._load(f) | return inst._load(f) | ||||
def _deserialize_lexer_conf(self, data, memo, options): | |||||
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) | |||||
lexer_conf.callbacks = options.lexer_callbacks or {} | |||||
lexer_conf.re_module = regex if options.regex else re | |||||
lexer_conf.use_bytes = options.use_bytes | |||||
lexer_conf.g_regex_flags = options.g_regex_flags | |||||
lexer_conf.skip_validation = True | |||||
lexer_conf.postlex = options.postlex | |||||
return lexer_conf | |||||
def _load(self, f, **kwargs): | def _load(self, f, **kwargs): | ||||
if isinstance(f, dict): | if isinstance(f, dict): | ||||
d = f | d = f | ||||
@@ -428,16 +437,18 @@ class Lark(Serialize): | |||||
self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | ||||
self.source_path = '<deserialized>' | self.source_path = '<deserialized>' | ||||
parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options) | |||||
self.terminals = self.lexer_conf.terminals | |||||
self._prepare_callbacks() | self._prepare_callbacks() | ||||
self.parser = self.parser_class.deserialize( | |||||
self._terminals_dict = {t.name: t for t in self.terminals} | |||||
self.parser = parser_class.deserialize( | |||||
data['parser'], | data['parser'], | ||||
memo, | memo, | ||||
self.lexer_conf, | |||||
self._callbacks, | self._callbacks, | ||||
self.options, # Not all, but multiple attributes are used | self.options, # Not all, but multiple attributes are used | ||||
) | ) | ||||
self.lexer_conf = self.parser.lexer_conf | |||||
self.terminals = self.parser.lexer_conf.terminals | |||||
self._terminals_dict = {t.name: t for t in self.terminals} | |||||
return self | return self | ||||
@classmethod | @classmethod | ||||
@@ -40,21 +40,10 @@ class MakeParsingFrontend: | |||||
return ParsingFrontend(lexer_conf, parser_conf, options) | return ParsingFrontend(lexer_conf, parser_conf, options) | ||||
@classmethod | @classmethod | ||||
def deserialize(cls, data, memo, callbacks, options): | |||||
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) | |||||
def deserialize(cls, data, memo, lexer_conf, callbacks, options): | |||||
parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | ||||
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | ||||
parser_conf.callbacks = callbacks | parser_conf.callbacks = callbacks | ||||
terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] | |||||
lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) | |||||
lexer_conf.re_module = regex if options.regex else re | |||||
lexer_conf.use_bytes = options.use_bytes | |||||
lexer_conf.g_regex_flags = options.g_regex_flags | |||||
lexer_conf.skip_validation = True | |||||
lexer_conf.postlex = options.postlex | |||||
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) | return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) | ||||
@@ -129,7 +129,7 @@ class ParserState(object): | |||||
# shift once and return | # shift once and return | ||||
assert not is_end | assert not is_end | ||||
state_stack.append(arg) | state_stack.append(arg) | ||||
value_stack.append(token) | |||||
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) | |||||
return | return | ||||
else: | else: | ||||
# reduce+shift as many times as necessary | # reduce+shift as many times as necessary | ||||
@@ -1,9 +1,8 @@ | |||||
from __future__ import absolute_import | from __future__ import absolute_import | ||||
import sys | |||||
from unittest import TestCase, main | from unittest import TestCase, main | ||||
from lark import Lark, Tree | |||||
from lark import Lark, Tree, Transformer | |||||
from lark.lexer import Lexer, Token | from lark.lexer import Lexer, Token | ||||
import lark.lark as lark_module | import lark.lark as lark_module | ||||
@@ -12,8 +11,6 @@ try: | |||||
except ImportError: | except ImportError: | ||||
from io import BytesIO as StringIO | from io import BytesIO as StringIO | ||||
import tempfile, os | |||||
class MockFile(StringIO): | class MockFile(StringIO): | ||||
def close(self): | def close(self): | ||||
@@ -48,6 +45,18 @@ class CustomLexer(Lexer): | |||||
yield Token('A', obj) | yield Token('A', obj) | ||||
class TestT(Transformer): | |||||
def add(self, children): | |||||
return sum(children if isinstance(children, list) else children.children) | |||||
def NUM(self, token): | |||||
return int(token) | |||||
def append_zero(t): | |||||
return t.update(value=t.value + '0') | |||||
class TestCache(TestCase): | class TestCache(TestCase): | ||||
def setUp(self): | def setUp(self): | ||||
pass | pass | ||||
@@ -73,7 +82,7 @@ class TestCache(TestCase): | |||||
parser = Lark(g, parser='lalr', cache=True) | parser = Lark(g, parser='lalr', cache=True) | ||||
assert parser.parse('a') == Tree('start', []) | assert parser.parse('a') == Tree('start', []) | ||||
parser = Lark(g+' "b"', parser='lalr', cache=True) | |||||
parser = Lark(g + ' "b"', parser='lalr', cache=True) | |||||
assert len(mock_fs.files) == 2 | assert len(mock_fs.files) == 2 | ||||
assert parser.parse('ab') == Tree('start', []) | assert parser.parse('ab') == Tree('start', []) | ||||
@@ -92,10 +101,29 @@ class TestCache(TestCase): | |||||
Lark(g, parser="lalr", debug=True, cache=True) | Lark(g, parser="lalr", debug=True, cache=True) | ||||
parser = Lark(g, parser="lalr", debug=True, cache=True) | parser = Lark(g, parser="lalr", debug=True, cache=True) | ||||
assert parser.options.options['debug'] | assert parser.options.options['debug'] | ||||
# Test inline transformer (tree-less) & lexer_callbacks | |||||
mock_fs.files = {} | |||||
g = """ | |||||
start: add+ | |||||
add: NUM "+" NUM | |||||
NUM: /\d+/ | |||||
%ignore " " | |||||
""" | |||||
text = "1+2 3+4" | |||||
expected = Tree('start', [30, 70]) | |||||
parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero}) | |||||
res0 = parser.parse(text) | |||||
parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero}) | |||||
assert len(mock_fs.files) == 1 | |||||
res1 = parser.parse(text) | |||||
res2 = TestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text)) | |||||
assert res0 == res1 == res2 == expected | |||||
finally: | finally: | ||||
lark_module.FS = fs | lark_module.FS = fs | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
main() | main() |
@@ -10,7 +10,7 @@ from copy import copy, deepcopy | |||||
from lark.utils import Py36, isascii | from lark.utils import Py36, isascii | ||||
from lark import Token | |||||
from lark import Token, Transformer_NonRecursive | |||||
try: | try: | ||||
from cStringIO import StringIO as cStringIO | from cStringIO import StringIO as cStringIO | ||||
@@ -34,7 +34,7 @@ from lark import logger | |||||
from lark.lark import Lark | from lark.lark import Lark | ||||
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters | from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters | ||||
from lark.tree import Tree | from lark.tree import Tree | ||||
from lark.visitors import Transformer, Transformer_InPlace, v_args | |||||
from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive | |||||
from lark.grammar import Rule | from lark.grammar import Rule | ||||
from lark.lexer import TerminalDef, Lexer, TraditionalLexer | from lark.lexer import TerminalDef, Lexer, TraditionalLexer | ||||
from lark.indenter import Indenter | from lark.indenter import Indenter | ||||
@@ -162,6 +162,28 @@ class TestParsers(unittest.TestCase): | |||||
r = p.parse("x") | r = p.parse("x") | ||||
self.assertEqual( r.children, ["X!"] ) | self.assertEqual( r.children, ["X!"] ) | ||||
def test_visit_tokens2(self): | |||||
g = """ | |||||
start: add+ | |||||
add: NUM "+" NUM | |||||
NUM: /\d+/ | |||||
%ignore " " | |||||
""" | |||||
text = "1+2 3+4" | |||||
expected = Tree('start', [3, 7]) | |||||
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | |||||
class T(base): | |||||
def add(self, children): | |||||
return sum(children if isinstance(children, list) else children.children) | |||||
def NUM(self, token): | |||||
return int(token) | |||||
parser = Lark(g, parser='lalr', transformer=T()) | |||||
result = parser.parse(text) | |||||
self.assertEqual(result, expected) | |||||
def test_vargs_meta(self): | def test_vargs_meta(self): | ||||
@v_args(meta=True) | @v_args(meta=True) | ||||