Переглянути джерело

Merge branch 'MegaIng-fix-818'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 роки тому
джерело
коміт
4c7d1bdce8
5 змінених файлів з 85 додано та 35 видалено
  1. +25
    -14
      lark/lark.py
  2. +1
    -12
      lark/parser_frontends.py
  3. +1
    -1
      lark/parsers/lalr_parser.py
  4. +34
    -6
      tests/test_cache.py
  5. +24
    -2
      tests/test_parser.py

+ 25
- 14
lark/lark.py Переглянути файл

@@ -185,7 +185,7 @@ class LarkOptions(Serialize):

# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
# These option are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}

_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
@@ -343,12 +343,10 @@ class Lark(Serialize):
rule.options.priority = None

# TODO Deprecate lexer_callbacks?
lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
if self.options.transformer
else {})
lexer_callbacks.update(self.options.lexer_callbacks)

self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
self.lexer_conf = LexerConf(
self.terminals, re_module, self.ignore_tokens, self.options.postlex,
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
)

if self.options.parser:
self.parser = self._build_parser()
@@ -375,8 +373,7 @@ class Lark(Serialize):
return TraditionalLexer(lexer_conf)

def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._callbacks = None
self._callbacks = {}
# we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder(
@@ -387,11 +384,13 @@ class Lark(Serialize):
self.options.maybe_placeholders
)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))

def _build_parser(self):
self._prepare_callbacks()
parser_class = get_frontend(self.options.parser, self.options.lexer)
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
return parser_class(self.lexer_conf, parser_conf, options=self.options)

def save(self, f):
"""Saves the instance into the given file object
@@ -410,6 +409,16 @@ class Lark(Serialize):
inst = cls.__new__(cls)
return inst._load(f)

def _deserialize_lexer_conf(self, data, memo, options):
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
lexer_conf.callbacks = options.lexer_callbacks or {}
lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex
return lexer_conf

def _load(self, f, **kwargs):
if isinstance(f, dict):
d = f
@@ -428,16 +437,18 @@ class Lark(Serialize):
self.options = LarkOptions.deserialize(options, memo)
self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source_path = '<deserialized>'
parser_class = get_frontend(self.options.parser, self.options.lexer)
self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
self.terminals = self.lexer_conf.terminals
self._prepare_callbacks()
self.parser = self.parser_class.deserialize(
self._terminals_dict = {t.name: t for t in self.terminals}
self.parser = parser_class.deserialize(
data['parser'],
memo,
self.lexer_conf,
self._callbacks,
self.options, # Not all, but multiple attributes are used
)
self.lexer_conf = self.parser.lexer_conf
self.terminals = self.parser.lexer_conf.terminals
self._terminals_dict = {t.name: t for t in self.terminals}
return self

@classmethod


+ 1
- 12
lark/parser_frontends.py Переглянути файл

@@ -40,21 +40,10 @@ class MakeParsingFrontend:
return ParsingFrontend(lexer_conf, parser_conf, options)

@classmethod
def deserialize(cls, data, memo, callbacks, options):
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
def deserialize(cls, data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks

terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]

lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex

return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)




+ 1
- 1
lark/parsers/lalr_parser.py Переглянути файл

@@ -129,7 +129,7 @@ class ParserState(object):
# shift once and return
assert not is_end
state_stack.append(arg)
value_stack.append(token)
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
return
else:
# reduce+shift as many times as necessary


+ 34
- 6
tests/test_cache.py Переглянути файл

@@ -1,9 +1,8 @@
from __future__ import absolute_import

import sys
from unittest import TestCase, main

from lark import Lark, Tree
from lark import Lark, Tree, Transformer
from lark.lexer import Lexer, Token
import lark.lark as lark_module

@@ -12,8 +11,6 @@ try:
except ImportError:
from io import BytesIO as StringIO

import tempfile, os


class MockFile(StringIO):
def close(self):
@@ -48,6 +45,18 @@ class CustomLexer(Lexer):
yield Token('A', obj)


class TestT(Transformer):
def add(self, children):
return sum(children if isinstance(children, list) else children.children)

def NUM(self, token):
return int(token)


def append_zero(t):
return t.update(value=t.value + '0')


class TestCache(TestCase):
def setUp(self):
pass
@@ -73,7 +82,7 @@ class TestCache(TestCase):
parser = Lark(g, parser='lalr', cache=True)
assert parser.parse('a') == Tree('start', [])

parser = Lark(g+' "b"', parser='lalr', cache=True)
parser = Lark(g + ' "b"', parser='lalr', cache=True)
assert len(mock_fs.files) == 2
assert parser.parse('ab') == Tree('start', [])

@@ -92,10 +101,29 @@ class TestCache(TestCase):
Lark(g, parser="lalr", debug=True, cache=True)
parser = Lark(g, parser="lalr", debug=True, cache=True)
assert parser.options.options['debug']

# Test inline transformer (tree-less) & lexer_callbacks
mock_fs.files = {}
g = """
start: add+
add: NUM "+" NUM
NUM: /\d+/
%ignore " "
"""
text = "1+2 3+4"
expected = Tree('start', [30, 70])

parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero})
res0 = parser.parse(text)
parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero})
assert len(mock_fs.files) == 1
res1 = parser.parse(text)
res2 = TestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text))
assert res0 == res1 == res2 == expected

finally:
lark_module.FS = fs



if __name__ == '__main__':
main()

+ 24
- 2
tests/test_parser.py Переглянути файл

@@ -10,7 +10,7 @@ from copy import copy, deepcopy

from lark.utils import Py36, isascii

from lark import Token
from lark import Token, Transformer_NonRecursive

try:
from cStringIO import StringIO as cStringIO
@@ -34,7 +34,7 @@ from lark import logger
from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree
from lark.visitors import Transformer, Transformer_InPlace, v_args
from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive
from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter
@@ -162,6 +162,28 @@ class TestParsers(unittest.TestCase):
r = p.parse("x")
self.assertEqual( r.children, ["X!"] )

def test_visit_tokens2(self):
g = """
start: add+
add: NUM "+" NUM
NUM: /\d+/
%ignore " "
"""
text = "1+2 3+4"
expected = Tree('start', [3, 7])
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive):
class T(base):
def add(self, children):
return sum(children if isinstance(children, list) else children.children)
def NUM(self, token):
return int(token)
parser = Lark(g, parser='lalr', transformer=T())
result = parser.parse(text)
self.assertEqual(result, expected)

def test_vargs_meta(self):

@v_args(meta=True)


Завантаження…
Відмінити
Зберегти