瀏覽代碼

Merge branch 'fix-818' of https://github.com/MegaIng/lark into MegaIng-fix-818

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 年之前
父節點
當前提交
0332a4cae0
共有 4 個檔案被更改,包括 49 行新增20 行删除
  1. +13
    -9
      lark/lark.py
  2. +11
    -8
      lark/parser_frontends.py
  3. +1
    -1
      lark/parsers/lalr_parser.py
  4. +24
    -2
      tests/test_parser.py

+ 13
- 9
lark/lark.py 查看文件

@@ -343,9 +343,7 @@ class Lark(Serialize):
rule.options.priority = None

# TODO Deprecate lexer_callbacks?
lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
if self.options.transformer
else {})
lexer_callbacks = {}
lexer_callbacks.update(self.options.lexer_callbacks)

self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
@@ -375,8 +373,7 @@ class Lark(Serialize):
return TraditionalLexer(lexer_conf)

def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._callbacks = None
self._callbacks = {}
# we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder(
@@ -386,9 +383,11 @@ class Lark(Serialize):
self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
self.options.maybe_placeholders
)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
self._callbacks.update(self._parse_tree_builder.create_callback(self.options.transformer))
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))

def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._prepare_callbacks()
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
@@ -428,16 +427,21 @@ class Lark(Serialize):
self.options = LarkOptions.deserialize(options, memo)
self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source_path = '<deserialized>'
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self.lexer_conf = self.parser_class.deserialize_lexer_conf( # We need the terminals list to for _prepare_callbacks
data['parser'],
memo,
self.options)
self.terminals = self.lexer_conf.terminals
self._terminals_dict = {t.name: t for t in self.terminals}
self._prepare_callbacks()
self.parser = self.parser_class.deserialize(
data['parser'],
memo,
self.lexer_conf,
self._callbacks,
self.options, # Not all, but multiple attributes are used
)
self.lexer_conf = self.parser.lexer_conf
self.terminals = self.parser.lexer_conf.terminals
self._terminals_dict = {t.name: t for t in self.terminals}
return self

@classmethod


+ 11
- 8
lark/parser_frontends.py 查看文件

@@ -38,23 +38,26 @@ class MakeParsingFrontend:
parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)
@classmethod
def deserialize(cls, data, memo, callbacks, options):
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks

def deserialize_lexer_conf(cls, data, memo, options):
# We need lexer_conf earley to have the terminals that we need to produce the callback list for paser_conf
# So we split deserialize into two methods
terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex
return lexer_conf

@classmethod
def deserialize(cls, data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)




+ 1
- 1
lark/parsers/lalr_parser.py 查看文件

@@ -129,7 +129,7 @@ class ParserState(object):
# shift once and return
assert not is_end
state_stack.append(arg)
value_stack.append(token)
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
return
else:
# reduce+shift as many times as necessary


+ 24
- 2
tests/test_parser.py 查看文件

@@ -10,7 +10,7 @@ from copy import copy, deepcopy

from lark.utils import Py36, isascii

from lark import Token
from lark import Token, Transformer_NonRecursive

try:
from cStringIO import StringIO as cStringIO
@@ -34,7 +34,7 @@ from lark import logger
from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree
from lark.visitors import Transformer, Transformer_InPlace, v_args
from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive
from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter
@@ -162,6 +162,28 @@ class TestParsers(unittest.TestCase):
r = p.parse("x")
self.assertEqual( r.children, ["X!"] )

def test_visit_tokens2(self):
g = """
start: add+
add: NUM "+" NUM
NUM: /\d+/
%ignore " "
"""
text = "1+2 3+4"
expected = Tree('start', [3, 7])
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive):
class T(base):
def add(self, children):
return sum(children if isinstance(children, list) else children.children)
def NUM(self, token):
return int(token)
parser = Lark(g, parser='lalr', transformer=T())
result = parser.parse(text)
self.assertEqual(result, expected)

def test_vargs_meta(self):

@v_args(meta=True)


Loading…
取消
儲存