Browse Source

Merge branch 'MegaIng-fix-818'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 years ago
parent
commit
4c7d1bdce8
5 changed files with 85 additions and 35 deletions
  1. +25
    -14
      lark/lark.py
  2. +1
    -12
      lark/parser_frontends.py
  3. +1
    -1
      lark/parsers/lalr_parser.py
  4. +34
    -6
      tests/test_cache.py
  5. +24
    -2
      tests/test_parser.py

+ 25
- 14
lark/lark.py View File

@@ -185,7 +185,7 @@ class LarkOptions(Serialize):


# Options that can be passed to the Lark parser, even when it was loaded from cache/standalone. # Options that can be passed to the Lark parser, even when it was loaded from cache/standalone.
# These option are only used outside of `load_grammar`. # These option are only used outside of `load_grammar`.
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}


_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
@@ -343,12 +343,10 @@ class Lark(Serialize):
rule.options.priority = None rule.options.priority = None


# TODO Deprecate lexer_callbacks? # TODO Deprecate lexer_callbacks?
lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
if self.options.transformer
else {})
lexer_callbacks.update(self.options.lexer_callbacks)

self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
self.lexer_conf = LexerConf(
self.terminals, re_module, self.ignore_tokens, self.options.postlex,
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
)


if self.options.parser: if self.options.parser:
self.parser = self._build_parser() self.parser = self._build_parser()
@@ -375,8 +373,7 @@ class Lark(Serialize):
return TraditionalLexer(lexer_conf) return TraditionalLexer(lexer_conf)


def _prepare_callbacks(self): def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._callbacks = None
self._callbacks = {}
# we don't need these callbacks if we aren't building a tree # we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest': if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder( self._parse_tree_builder = ParseTreeBuilder(
@@ -387,11 +384,13 @@ class Lark(Serialize):
self.options.maybe_placeholders self.options.maybe_placeholders
) )
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))


def _build_parser(self): def _build_parser(self):
self._prepare_callbacks() self._prepare_callbacks()
parser_class = get_frontend(self.options.parser, self.options.lexer)
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
return self.parser_class(self.lexer_conf, parser_conf, options=self.options)
return parser_class(self.lexer_conf, parser_conf, options=self.options)


def save(self, f): def save(self, f):
"""Saves the instance into the given file object """Saves the instance into the given file object
@@ -410,6 +409,16 @@ class Lark(Serialize):
inst = cls.__new__(cls) inst = cls.__new__(cls)
return inst._load(f) return inst._load(f)


def _deserialize_lexer_conf(self, data, memo, options):
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
lexer_conf.callbacks = options.lexer_callbacks or {}
lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex
return lexer_conf

def _load(self, f, **kwargs): def _load(self, f, **kwargs):
if isinstance(f, dict): if isinstance(f, dict):
d = f d = f
@@ -428,16 +437,18 @@ class Lark(Serialize):
self.options = LarkOptions.deserialize(options, memo) self.options = LarkOptions.deserialize(options, memo)
self.rules = [Rule.deserialize(r, memo) for r in data['rules']] self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
self.source_path = '<deserialized>' self.source_path = '<deserialized>'
parser_class = get_frontend(self.options.parser, self.options.lexer)
self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
self.terminals = self.lexer_conf.terminals
self._prepare_callbacks() self._prepare_callbacks()
self.parser = self.parser_class.deserialize(
self._terminals_dict = {t.name: t for t in self.terminals}
self.parser = parser_class.deserialize(
data['parser'], data['parser'],
memo, memo,
self.lexer_conf,
self._callbacks, self._callbacks,
self.options, # Not all, but multiple attributes are used self.options, # Not all, but multiple attributes are used
) )
self.lexer_conf = self.parser.lexer_conf
self.terminals = self.parser.lexer_conf.terminals
self._terminals_dict = {t.name: t for t in self.terminals}
return self return self


@classmethod @classmethod


+ 1
- 12
lark/parser_frontends.py View File

@@ -40,21 +40,10 @@ class MakeParsingFrontend:
return ParsingFrontend(lexer_conf, parser_conf, options) return ParsingFrontend(lexer_conf, parser_conf, options)


@classmethod @classmethod
def deserialize(cls, data, memo, callbacks, options):
lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
def deserialize(cls, data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo) parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks parser_conf.callbacks = callbacks

terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]

lexer_conf.callbacks = _get_lexer_callbacks(options.transformer, terminals)
lexer_conf.re_module = regex if options.regex else re
lexer_conf.use_bytes = options.use_bytes
lexer_conf.g_regex_flags = options.g_regex_flags
lexer_conf.skip_validation = True
lexer_conf.postlex = options.postlex

return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)






+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -129,7 +129,7 @@ class ParserState(object):
# shift once and return # shift once and return
assert not is_end assert not is_end
state_stack.append(arg) state_stack.append(arg)
value_stack.append(token)
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
return return
else: else:
# reduce+shift as many times as necessary # reduce+shift as many times as necessary


+ 34
- 6
tests/test_cache.py View File

@@ -1,9 +1,8 @@
from __future__ import absolute_import from __future__ import absolute_import


import sys
from unittest import TestCase, main from unittest import TestCase, main


from lark import Lark, Tree
from lark import Lark, Tree, Transformer
from lark.lexer import Lexer, Token from lark.lexer import Lexer, Token
import lark.lark as lark_module import lark.lark as lark_module


@@ -12,8 +11,6 @@ try:
except ImportError: except ImportError:
from io import BytesIO as StringIO from io import BytesIO as StringIO


import tempfile, os



class MockFile(StringIO): class MockFile(StringIO):
def close(self): def close(self):
@@ -48,6 +45,18 @@ class CustomLexer(Lexer):
yield Token('A', obj) yield Token('A', obj)




class TestT(Transformer):
def add(self, children):
return sum(children if isinstance(children, list) else children.children)

def NUM(self, token):
return int(token)


def append_zero(t):
return t.update(value=t.value + '0')


class TestCache(TestCase): class TestCache(TestCase):
def setUp(self): def setUp(self):
pass pass
@@ -73,7 +82,7 @@ class TestCache(TestCase):
parser = Lark(g, parser='lalr', cache=True) parser = Lark(g, parser='lalr', cache=True)
assert parser.parse('a') == Tree('start', []) assert parser.parse('a') == Tree('start', [])


parser = Lark(g+' "b"', parser='lalr', cache=True)
parser = Lark(g + ' "b"', parser='lalr', cache=True)
assert len(mock_fs.files) == 2 assert len(mock_fs.files) == 2
assert parser.parse('ab') == Tree('start', []) assert parser.parse('ab') == Tree('start', [])


@@ -92,10 +101,29 @@ class TestCache(TestCase):
Lark(g, parser="lalr", debug=True, cache=True) Lark(g, parser="lalr", debug=True, cache=True)
parser = Lark(g, parser="lalr", debug=True, cache=True) parser = Lark(g, parser="lalr", debug=True, cache=True)
assert parser.options.options['debug'] assert parser.options.options['debug']

# Test inline transformer (tree-less) & lexer_callbacks
mock_fs.files = {}
g = """
start: add+
add: NUM "+" NUM
NUM: /\d+/
%ignore " "
"""
text = "1+2 3+4"
expected = Tree('start', [30, 70])

parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero})
res0 = parser.parse(text)
parser = Lark(g, parser='lalr', transformer=TestT(), cache=True, lexer_callbacks={'NUM': append_zero})
assert len(mock_fs.files) == 1
res1 = parser.parse(text)
res2 = TestT().transform(Lark(g, parser="lalr", cache=True, lexer_callbacks={'NUM': append_zero}).parse(text))
assert res0 == res1 == res2 == expected

finally: finally:
lark_module.FS = fs lark_module.FS = fs





if __name__ == '__main__': if __name__ == '__main__':
main() main()

+ 24
- 2
tests/test_parser.py View File

@@ -10,7 +10,7 @@ from copy import copy, deepcopy


from lark.utils import Py36, isascii from lark.utils import Py36, isascii


from lark import Token
from lark import Token, Transformer_NonRecursive


try: try:
from cStringIO import StringIO as cStringIO from cStringIO import StringIO as cStringIO
@@ -34,7 +34,7 @@ from lark import logger
from lark.lark import Lark from lark.lark import Lark
from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
from lark.tree import Tree from lark.tree import Tree
from lark.visitors import Transformer, Transformer_InPlace, v_args
from lark.visitors import Transformer, Transformer_InPlace, v_args, Transformer_InPlaceRecursive
from lark.grammar import Rule from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter from lark.indenter import Indenter
@@ -162,6 +162,28 @@ class TestParsers(unittest.TestCase):
r = p.parse("x") r = p.parse("x")
self.assertEqual( r.children, ["X!"] ) self.assertEqual( r.children, ["X!"] )


def test_visit_tokens2(self):
g = """
start: add+
add: NUM "+" NUM
NUM: /\d+/
%ignore " "
"""
text = "1+2 3+4"
expected = Tree('start', [3, 7])
for base in (Transformer, Transformer_InPlace, Transformer_NonRecursive, Transformer_InPlaceRecursive):
class T(base):
def add(self, children):
return sum(children if isinstance(children, list) else children.children)
def NUM(self, token):
return int(token)
parser = Lark(g, parser='lalr', transformer=T())
result = parser.parse(text)
self.assertEqual(result, expected)

def test_vargs_meta(self): def test_vargs_meta(self):


@v_args(meta=True) @v_args(meta=True)


Loading…
Cancel
Save