@@ -343,9 +343,7 @@ class Lark(Serialize): | |||||
rule.options.priority = None | rule.options.priority = None | ||||
# TODO Deprecate lexer_callbacks? | # TODO Deprecate lexer_callbacks? | ||||
lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals) | |||||
if self.options.transformer | |||||
else {}) | |||||
lexer_callbacks = {} | |||||
lexer_callbacks.update(self.options.lexer_callbacks) | lexer_callbacks.update(self.options.lexer_callbacks) | ||||
self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes) | self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes) | ||||
@@ -375,8 +373,7 @@ class Lark(Serialize): | |||||
return TraditionalLexer(lexer_conf) | return TraditionalLexer(lexer_conf) | ||||
def _prepare_callbacks(self): | def _prepare_callbacks(self): | ||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
self._callbacks = None | |||||
self._callbacks = {} | |||||
# we don't need these callbacks if we aren't building a tree | # we don't need these callbacks if we aren't building a tree | ||||
if self.options.ambiguity != 'forest': | if self.options.ambiguity != 'forest': | ||||
self._parse_tree_builder = ParseTreeBuilder( | self._parse_tree_builder = ParseTreeBuilder( | ||||
@@ -386,9 +383,11 @@ class Lark(Serialize): | |||||
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', | self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', | ||||
self.options.maybe_placeholders | self.options.maybe_placeholders | ||||
) | ) | ||||
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) | |||||
self._callbacks.update(self._parse_tree_builder.create_callback(self.options.transformer)) | |||||
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals)) | |||||
def _build_parser(self): | def _build_parser(self): | ||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
self._prepare_callbacks() | self._prepare_callbacks() | ||||
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | ||||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | ||||
@@ -428,16 +427,21 @@ class Lark(Serialize): | |||||
self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | ||||
self.source_path = '<deserialized>' | self.source_path = '<deserialized>' | ||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | |||||
self.lexer_conf = self.parser_class.deserialize_lexer_conf( # We need the terminals list to for _prepare_callbacks | |||||
data['parser'], | |||||
memo, | |||||
self.options) | |||||
self.terminals = self.lexer_conf.terminals | |||||
self._terminals_dict = {t.name: t for t in self.terminals} | |||||
self._prepare_callbacks() | self._prepare_callbacks() | ||||
self.parser = self.parser_class.deserialize( | self.parser = self.parser_class.deserialize( | ||||
data['parser'], | data['parser'], | ||||
memo, | memo, | ||||
self.lexer_conf, | |||||
self._callbacks, | self._callbacks, | ||||
self.options, # Not all, but multiple attributes are used | self.options, # Not all, but multiple attributes are used | ||||
) | ) | ||||
self.lexer_conf = self.parser.lexer_conf | |||||
self.terminals = self.parser.lexer_conf.terminals | |||||
self._terminals_dict = {t.name: t for t in self.terminals} | |||||
return self | return self | ||||
@classmethod | @classmethod | ||||
@@ -38,23 +38,26 @@ class MakeParsingFrontend: | |||||
parser_conf.parser_type = self.parser_type | parser_conf.parser_type = self.parser_type | ||||
lexer_conf.lexer_type = self.lexer_type | lexer_conf.lexer_type = self.lexer_type | ||||
return ParsingFrontend(lexer_conf, parser_conf, options) | return ParsingFrontend(lexer_conf, parser_conf, options) | ||||
@classmethod | @classmethod | ||||
def deserialize(cls, data, memo, callbacks, options): | |||||
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) | |||||
parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | |||||
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | |||||
parser_conf.callbacks = callbacks | |||||
def deserialize_lexer_conf(cls, data, memo, options): | |||||
# We need lexer_conf earley to have the terminals that we need to produce the callback list for paser_conf | |||||
# So we split deserialize into two methods | |||||
terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] | terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] | ||||
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo) | |||||
lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) | lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) | ||||
lexer_conf.re_module = regex if options.regex else re | lexer_conf.re_module = regex if options.regex else re | ||||
lexer_conf.use_bytes = options.use_bytes | lexer_conf.use_bytes = options.use_bytes | ||||
lexer_conf.g_regex_flags = options.g_regex_flags | lexer_conf.g_regex_flags = options.g_regex_flags | ||||
lexer_conf.skip_validation = True | lexer_conf.skip_validation = True | ||||
lexer_conf.postlex = options.postlex | lexer_conf.postlex = options.postlex | ||||
return lexer_conf | |||||
@classmethod | |||||
def deserialize(cls, data, memo, lexer_conf, callbacks, options): | |||||
parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | |||||
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | |||||
parser_conf.callbacks = callbacks | |||||
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) | return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) | ||||
@@ -129,7 +129,7 @@ class ParserState(object): | |||||
# shift once and return | # shift once and return | ||||
assert not is_end | assert not is_end | ||||
state_stack.append(arg) | state_stack.append(arg) | ||||
value_stack.append(token) | |||||
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) | |||||
return | return | ||||
else: | else: | ||||
# reduce+shift as many times as necessary | # reduce+shift as many times as necessary | ||||
@@ -10,7 +10,7 @@ from copy import copy, deepcopy | |||||
from lark.utils import Py36, isascii | from lark.utils import Py36, isascii | ||||
from lark import Token | |||||
from lark import Token, Transformer_NonRecursive | |||||
try: | try: | ||||
from cStringIO import StringIO as cStringIO | from cStringIO import StringIO as cStringIO | ||||
@@ -34,7 +34,7 @@ from lark import logger | |||||
from lark.lark import Lark | from lark.lark import Lark | ||||
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters | from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters | ||||
from lark.tree import Tree | from lark.tree import Tree | ||||
from lark.visitors import Transformer, Transformer_InPlace, v_args | |||||
from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive | |||||
from lark.grammar import Rule | from lark.grammar import Rule | ||||
from lark.lexer import TerminalDef, Lexer, TraditionalLexer | from lark.lexer import TerminalDef, Lexer, TraditionalLexer | ||||
from lark.indenter import Indenter | from lark.indenter import Indenter | ||||
@@ -162,6 +162,28 @@ class TestParsers(unittest.TestCase): | |||||
r = p.parse("x") | r = p.parse("x") | ||||
self.assertEqual( r.children, ["X!"] ) | self.assertEqual( r.children, ["X!"] ) | ||||
def test_visit_tokens2(self): | |||||
g = """ | |||||
start: add+ | |||||
add: NUM "+" NUM | |||||
NUM: /\d+/ | |||||
%ignore " " | |||||
""" | |||||
text = "1+2 3+4" | |||||
expected = Tree('start', [3, 7]) | |||||
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive): | |||||
class T(base): | |||||
def add(self, children): | |||||
return sum(children if isinstance(children, list) else children.children) | |||||
def NUM(self, token): | |||||
return int(token) | |||||
parser = Lark(g, parser='lalr', transformer=T()) | |||||
result = parser.parse(text) | |||||
self.assertEqual(result, expected) | |||||
def test_vargs_meta(self): | def test_vargs_meta(self): | ||||
@v_args(meta=True) | @v_args(meta=True) | ||||