Browse Source

Merge pull request #776 from MegaIng/earley_custom

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Shinan 3 years ago
committed by GitHub
parent
commit
8379d784ec
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 58 deletions
  1. +30
    -17
      lark/parser_frontends.py
  2. +1
    -14
      tests/__main__.py
  3. +49
    -27
      tests/test_parser.py

+ 30
- 17
lark/parser_frontends.py View File

@@ -14,6 +14,18 @@ import re


###{standalone ###{standalone


def _wrap_lexer(lexer_class):
future_interface = getattr(lexer_class, '__future_interface__', False)
if future_interface:
return lexer_class
else:
class CustomLexerWrapper(Lexer):
def __init__(self, lexer_conf):
self.lexer = lexer_class(lexer_conf)
def lex(self, lexer_state, parser_state):
return self.lexer.lex(lexer_state.text)
return CustomLexerWrapper

def get_frontend(parser, lexer): def get_frontend(parser, lexer):
if parser=='lalr': if parser=='lalr':
if lexer is None: if lexer is None:
@@ -23,34 +35,28 @@ def get_frontend(parser, lexer):
elif lexer == 'contextual': elif lexer == 'contextual':
return LALR_ContextualLexer return LALR_ContextualLexer
elif issubclass(lexer, Lexer): elif issubclass(lexer, Lexer):
class CustomLexerWrapper(Lexer):
def __init__(self, lexer_conf):
self.lexer = lexer(lexer_conf)
def lex(self, lexer_state, parser_state):
return self.lexer.lex(lexer_state.text)

wrapped = _wrap_lexer(lexer)
class LALR_CustomLexerWrapper(LALR_WithLexer): class LALR_CustomLexerWrapper(LALR_WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
super(LALR_CustomLexerWrapper, self).__init__(lexer_conf, parser_conf, options=options)
def init_lexer(self): def init_lexer(self):
future_interface = getattr(lexer, '__future_interface__', False)
if future_interface:
self.lexer = lexer(self.lexer_conf)
else:
self.lexer = CustomLexerWrapper(self.lexer_conf)

self.lexer = wrapped(self.lexer_conf)
return LALR_CustomLexerWrapper return LALR_CustomLexerWrapper
else: else:
raise ValueError('Unknown lexer: %s' % lexer) raise ValueError('Unknown lexer: %s' % lexer)
elif parser=='earley': elif parser=='earley':
if lexer=='standard': if lexer=='standard':
return Earley
return Earley_Traditional
elif lexer=='dynamic': elif lexer=='dynamic':
return XEarley return XEarley
elif lexer=='dynamic_complete': elif lexer=='dynamic_complete':
return XEarley_CompleteLex return XEarley_CompleteLex
elif lexer=='contextual': elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser') raise ValueError('The Earley parser does not support the contextual parser')
elif issubclass(lexer, Lexer):
wrapped = _wrap_lexer(lexer)
class Earley_CustomLexerWrapper(Earley_WithLexer):
def init_lexer(self, **kw):
self.lexer = wrapped(self.lexer_conf)
return Earley_CustomLexerWrapper
else: else:
raise ValueError('Unknown lexer: %s' % lexer) raise ValueError('Unknown lexer: %s' % lexer)
elif parser == 'cyk': elif parser == 'cyk':
@@ -163,10 +169,10 @@ class LALR_ContextualLexer(LALR_WithLexer):
###} ###}




class Earley(WithLexer):
class Earley_WithLexer(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None): def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf, parser_conf, options) WithLexer.__init__(self, lexer_conf, parser_conf, options)
self.init_traditional_lexer()
self.init_lexer()


resolve_ambiguity = options.ambiguity == 'resolve' resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False debug = options.debug if options else False
@@ -176,6 +182,13 @@ class Earley(WithLexer):
def match(self, term, token): def match(self, term, token):
return term.name == token.type return term.name == token.type


def init_lexer(self, **kw):
raise NotImplementedError()

class Earley_Traditional(Earley_WithLexer):
def init_lexer(self, **kw):
self.init_traditional_lexer()



class XEarley(_ParserFrontend): class XEarley(_ParserFrontend):
def __init__(self, lexer_conf, parser_conf, options=None, **kw): def __init__(self, lexer_conf, parser_conf, options=None, **kw):


+ 1
- 14
tests/__main__.py View File

@@ -21,20 +21,7 @@ except ImportError:


from .test_logger import Testlogger from .test_logger import Testlogger


from .test_parser import (
TestLalrStandard,
TestEarleyStandard,
TestCykStandard,
TestLalrContextual,
TestEarleyDynamic,
TestLalrCustom,

# TestFullEarleyStandard,
TestFullEarleyDynamic,
TestFullEarleyDynamic_complete,

TestParsers,
)
from .test_parser import * # We define __all__ to list which TestSuites to run


logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)




+ 49
- 27
tests/test_parser.py View File

@@ -39,8 +39,7 @@ from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter from lark.indenter import Indenter


logger.setLevel(logging.INFO)

__all__ = ['TestParsers']


__path__ = os.path.dirname(__file__) __path__ = os.path.dirname(__file__)
def _read(n, *args): def _read(n, *args):
@@ -856,18 +855,32 @@ def _make_full_earley_test(LEXER):
_NAME = "TestFullEarley" + LEXER.capitalize() _NAME = "TestFullEarley" + LEXER.capitalize()
_TestFullEarley.__name__ = _NAME _TestFullEarley.__name__ = _NAME
globals()[_NAME] = _TestFullEarley globals()[_NAME] = _TestFullEarley
__all__.append(_NAME)


class CustomLexer(Lexer):
class CustomLexerNew(Lexer):
""" """
Purpose of this custom lexer is to test the integration, Purpose of this custom lexer is to test the integration,
so it uses the traditionalparser as implementation without custom lexing behaviour. so it uses the traditionalparser as implementation without custom lexing behaviour.
""" """
def __init__(self, lexer_conf): def __init__(self, lexer_conf):
self.lexer = TraditionalLexer(copy(lexer_conf)) self.lexer = TraditionalLexer(copy(lexer_conf))
def lex(self, *args, **kwargs):
return self.lexer.lex(*args, **kwargs)
def lex(self, lexer_state, parser_state):
return self.lexer.lex(lexer_state, parser_state)
__future_interface__ = True __future_interface__ = True
class CustomLexerOld(Lexer):
"""
Purpose of this custom lexer is to test the integration,
so it uses the traditionalparser as implementation without custom lexing behaviour.
"""
def __init__(self, lexer_conf):
self.lexer = TraditionalLexer(copy(lexer_conf))
def lex(self, text):
ls = self.lexer.make_lexer_state(text)
return self.lexer.lex(ls, None)
__future_interface__ = False


def _tree_structure_check(a, b): def _tree_structure_check(a, b):
""" """
@@ -941,12 +954,18 @@ class DualBytesLark:
self.bytes_lark.load(f) self.bytes_lark.load(f)


def _make_parser_test(LEXER, PARSER): def _make_parser_test(LEXER, PARSER):
lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
if LEXER == 'custom_new':
lexer_class_or_name = CustomLexerNew
elif LEXER == 'custom_old':
lexer_class_or_name = CustomLexerOld
else:
lexer_class_or_name = LEXER
def _Lark(grammar, **kwargs): def _Lark(grammar, **kwargs):
return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
def _Lark_open(gfilename, **kwargs): def _Lark_open(gfilename, **kwargs):
return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)



class _TestParser(unittest.TestCase): class _TestParser(unittest.TestCase):
def test_basic1(self): def test_basic1(self):
g = _Lark("""start: a+ b a* "b" a* g = _Lark("""start: a+ b a* "b" a*
@@ -1502,7 +1521,7 @@ def _make_parser_test(LEXER, PARSER):
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))


def test_float_without_lexer(self): def test_float_without_lexer(self):
expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
expected_error = UnexpectedCharacters if 'dynamic' in LEXER else UnexpectedToken
if PARSER == 'cyk': if PARSER == 'cyk':
expected_error = ParseError expected_error = ParseError


@@ -1635,13 +1654,13 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(d.line, 2) self.assertEqual(d.line, 2)
self.assertEqual(d.column, 2) self.assertEqual(d.column, 2)


if LEXER != 'dynamic':
self.assertEqual(a.end_line, 1)
self.assertEqual(a.end_column, 2)
self.assertEqual(bc.end_line, 2)
self.assertEqual(bc.end_column, 2)
self.assertEqual(d.end_line, 2)
self.assertEqual(d.end_column, 3)
# if LEXER != 'dynamic':
self.assertEqual(a.end_line, 1)
self.assertEqual(a.end_column, 2)
self.assertEqual(bc.end_line, 2)
self.assertEqual(bc.end_column, 2)
self.assertEqual(d.end_line, 2)
self.assertEqual(d.end_column, 3)






@@ -1872,7 +1891,7 @@ def _make_parser_test(LEXER, PARSER):
""" """
self.assertRaises(IOError, _Lark, grammar) self.assertRaises(IOError, _Lark, grammar)


@unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic")
@unittest.skipIf('dynamic' in LEXER, "%declare/postlex doesn't work with dynamic")
def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? def test_postlex_declare(self): # Note: this test does a lot. maybe split it up?
class TestPostLexer: class TestPostLexer:
def process(self, stream): def process(self, stream):
@@ -1895,7 +1914,7 @@ def _make_parser_test(LEXER, PARSER):
tree = parser.parse(test_file) tree = parser.parse(test_file)
self.assertEqual(tree.children, [Token('B', 'A')]) self.assertEqual(tree.children, [Token('B', 'A')])


@unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic")
@unittest.skipIf('dynamic' in LEXER, "%declare/postlex doesn't work with dynamic")
def test_postlex_indenter(self): def test_postlex_indenter(self):
class CustomIndenter(Indenter): class CustomIndenter(Indenter):
NL_type = 'NEWLINE' NL_type = 'NEWLINE'
@@ -1992,7 +2011,7 @@ def _make_parser_test(LEXER, PARSER):






@unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules")
@unittest.skipIf(PARSER != 'earley' or 'dynamic' not in LEXER, "Currently only Earley supports priority sum in rules")
def test_prioritization_sum(self): def test_prioritization_sum(self):
"Tests effect of priority on result" "Tests effect of priority on result"


@@ -2203,9 +2222,9 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(tok, text) self.assertEqual(tok, text)
self.assertEqual(tok.line, 1) self.assertEqual(tok.line, 1)
self.assertEqual(tok.column, 1) self.assertEqual(tok.column, 1)
if _LEXER != 'dynamic':
self.assertEqual(tok.end_line, 2)
self.assertEqual(tok.end_column, 6)
# if _LEXER != 'dynamic':
self.assertEqual(tok.end_line, 2)
self.assertEqual(tok.end_column, 6)


@unittest.skipIf(PARSER=='cyk', "Empty rules") @unittest.skipIf(PARSER=='cyk', "Empty rules")
def test_empty_end(self): def test_empty_end(self):
@@ -2296,7 +2315,7 @@ def _make_parser_test(LEXER, PARSER):
parser = _Lark(grammar) parser = _Lark(grammar)




@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
@unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self): def test_serialize(self):
grammar = """ grammar = """
start: _ANY b "C" start: _ANY b "C"
@@ -2342,7 +2361,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(a.line, 1) self.assertEqual(a.line, 1)
self.assertEqual(b.line, 2) self.assertEqual(b.line, 2)


@unittest.skipIf(PARSER=='cyk', "match_examples() not supported for CYK")
@unittest.skipIf(PARSER=='cyk' or LEXER=='custom_old', "match_examples() not supported for CYK/old custom lexer")
def test_match_examples(self): def test_match_examples(self):
p = _Lark(r""" p = _Lark(r"""
start: "a" "b" "c" start: "a" "b" "c"
@@ -2425,17 +2444,20 @@ def _make_parser_test(LEXER, PARSER):
_TestParser.__name__ = _NAME _TestParser.__name__ = _NAME
_TestParser.__qualname__ = "tests.test_parser." + _NAME _TestParser.__qualname__ = "tests.test_parser." + _NAME
globals()[_NAME] = _TestParser globals()[_NAME] = _TestParser
__all__.append(_NAME)


# Note: You still have to import them in __main__ for the tests to run
_TO_TEST = [ _TO_TEST = [
('standard', 'earley'), ('standard', 'earley'),
('standard', 'cyk'), ('standard', 'cyk'),
('standard', 'lalr'),
('dynamic', 'earley'), ('dynamic', 'earley'),
('dynamic_complete', 'earley'), ('dynamic_complete', 'earley'),
('standard', 'lalr'),
('contextual', 'lalr'), ('contextual', 'lalr'),
('custom', 'lalr'),
# (None, 'earley'),
('custom_new', 'lalr'),
('custom_old', 'earley'),
] ]


for _LEXER, _PARSER in _TO_TEST: for _LEXER, _PARSER in _TO_TEST:


Loading…
Cancel
Save