Browse Source

Merge branch 'fix-818' of https://github.com/MegaIng/lark into MegaIng-fix-818

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 years ago
parent
commit
0332a4cae0
4 changed files with 49 additions and 20 deletions
  1. +13
    -9
      lark/lark.py
  2. +11
    -8
      lark/parser_frontends.py
  3. +1
    -1
      lark/parsers/lalr_parser.py
  4. +24
    -2
      tests/test_parser.py

+ 13
- 9
lark/lark.py View File

@@ -343,9 +343,7 @@ class Lark(Serialize):
rule.options.priority = None rule.options.priority = None


# TODO Deprecate lexer_callbacks? # TODO Deprecate lexer_callbacks?
lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
if self.options.transformer
else {})
lexer_callbacks = {}
lexer_callbacks.update(self.options.lexer_callbacks) lexer_callbacks.update(self.options.lexer_callbacks)


self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes) self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
@@ -375,8 +373,7 @@ class Lark(Serialize):
return TraditionalLexer(lexer_conf) return TraditionalLexer(lexer_conf)


def _prepare_callbacks(self): def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._callbacks = None
self._callbacks = {}
# we don't need these callbacks if we aren't building a tree # we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest': if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder( self._parse_tree_builder = ParseTreeBuilder(
@@ -386,9 +383,11 @@ class Lark(Serialize):
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit', self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
self.options.maybe_placeholders self.options.maybe_placeholders
) )
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
self._callbacks.update(self._parse_tree_builder.create_callback(self.options.transformer))
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))


def _build_parser(self): def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._prepare_callbacks() self._prepare_callbacks()
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
@@ -428,16 +427,21 @@ class Lark(Serialize):
self.options = LarkOptions.deserialize(options, memo) self.options = LarkOptions.deserialize(options, memo)
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source_path = '<deserialized>' self.source_path = '<deserialized>'
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self.lexer_conf = self.parser_class.deserialize_lexer_conf( # We need the terminals list to for _prepare_callbacks
data['parser'],
memo,
self.options)
self.terminals = self.lexer_conf.terminals
self._terminals_dict = {t.name: t for t in self.terminals}
self._prepare_callbacks() self._prepare_callbacks()
self.parser = self.parser_class.deserialize( self.parser = self.parser_class.deserialize(
data['parser'], data['parser'],
memo, memo,
self.lexer_conf,
self._callbacks, self._callbacks,
self.options, # Not all, but multiple attributes are used self.options, # Not all, but multiple attributes are used
) )
self.lexer_conf = self.parser.lexer_conf
self.terminals = self.parser.lexer_conf.terminals
self._terminals_dict = {t.name: t for t in self.terminals}
return self return self


@classmethod @classmethod


+ 11
- 8
lark/parser_frontends.py View File

@@ -38,23 +38,26 @@ class MakeParsingFrontend:
parser_conf.parser_type = self.parser_type parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options) return ParsingFrontend(lexer_conf, parser_conf, options)
@classmethod @classmethod
def deserialize(cls, data, memo, callbacks, options):
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks

def deserialize_lexer_conf(cls, data, memo, options):
# We need lexer_conf earley to have the terminals that we need to produce the callback list for paser_conf
# So we split deserialize into two methods
terminals = [item for item in memo.values() if isinstance(item, TerminalDef)] terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals) lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
lexer_conf.re_module = regex if options.regex else re lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex lexer_conf.postlex = options.postlex
return lexer_conf


@classmethod
def deserialize(cls, data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)






+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -129,7 +129,7 @@ class ParserState(object):
# shift once and return # shift once and return
assert not is_end assert not is_end
state_stack.append(arg) state_stack.append(arg)
value_stack.append(token)
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
return return
else: else:
# reduce+shift as many times as necessary # reduce+shift as many times as necessary


+ 24
- 2
tests/test_parser.py View File

@@ -10,7 +10,7 @@ from copy import copy, deepcopy


from lark.utils import Py36, isascii from lark.utils import Py36, isascii


from lark import Token
from lark import Token, Transformer_NonRecursive


try: try:
from cStringIO import StringIO as cStringIO from cStringIO import StringIO as cStringIO
@@ -34,7 +34,7 @@ from lark import logger
from lark.lark import Lark from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree from lark.tree import Tree
from lark.visitors import Transformer, Transformer_InPlace, v_args
from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive
from lark.grammar import Rule from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter from lark.indenter import Indenter
@@ -162,6 +162,28 @@ class TestParsers(unittest.TestCase):
r = p.parse("x") r = p.parse("x")
self.assertEqual( r.children, ["X!"] ) self.assertEqual( r.children, ["X!"] )


def test_visit_tokens2(self):
g = """
start: add+
add: NUM "+" NUM
NUM: /\d+/
%ignore " "
"""
text = "1+2 3+4"
expected = Tree('start', [3, 7])
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive):
class T(base):
def add(self, children):
return sum(children if isinstance(children, list) else children.children)
def NUM(self, token):
return int(token)
parser = Lark(g, parser='lalr', transformer=T())
result = parser.parse(text)
self.assertEqual(result, expected)

def test_vargs_meta(self): def test_vargs_meta(self):


@v_args(meta=True) @v_args(meta=True)


Loading…
Cancel
Save