Browse Source

Added custom lexer to earley.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 4 years ago
parent
commit
cf25c06420
3 changed files with 48 additions and 29 deletions
  1. +16
    -6
      lark/parser_frontends.py
  2. +1
    -14
      tests/__main__.py
  3. +31
    -9
      tests/test_parser.py

+ 16
- 6
lark/parser_frontends.py View File

@@ -28,10 +28,7 @@ def get_frontend(parser, lexer):
self.lexer = lexer(lexer_conf) self.lexer = lexer(lexer_conf)
def lex(self, lexer_state, parser_state): def lex(self, lexer_state, parser_state):
return self.lexer.lex(lexer_state.text) return self.lexer.lex(lexer_state.text)

class LALR_CustomLexerWrapper(LALR_WithLexer): class LALR_CustomLexerWrapper(LALR_WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
super(LALR_CustomLexerWrapper, self).__init__(lexer_conf, parser_conf, options=options)
def init_lexer(self): def init_lexer(self):
future_interface = getattr(lexer, '__future_interface__', False) future_interface = getattr(lexer, '__future_interface__', False)
if future_interface: if future_interface:
@@ -44,13 +41,19 @@ def get_frontend(parser, lexer):
raise ValueError('Unknown lexer: %s' % lexer) raise ValueError('Unknown lexer: %s' % lexer)
elif parser=='earley': elif parser=='earley':
if lexer=='standard': if lexer=='standard':
return Earley
return Earley_Traditional
elif lexer=='dynamic': elif lexer=='dynamic':
return XEarley return XEarley
elif lexer=='dynamic_complete': elif lexer=='dynamic_complete':
return XEarley_CompleteLex return XEarley_CompleteLex
elif lexer=='contextual': elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser') raise ValueError('The Earley parser does not support the contextual parser')
elif issubclass(lexer, Lexer):
assert not getattr(lexer, '__future_interface__', False), "Earley doesn't support the future interface right now"
class Earley_CustomLexerWrapper(Earley_WithLexer):
def init_lexer(self, **kw):
self.lexer = lexer(self.lexer_conf)
return Earley_CustomLexerWrapper
else: else:
raise ValueError('Unknown lexer: %s' % lexer) raise ValueError('Unknown lexer: %s' % lexer)
elif parser == 'cyk': elif parser == 'cyk':
@@ -163,10 +166,10 @@ class LALR_ContextualLexer(LALR_WithLexer):
###} ###}




class Earley(WithLexer):
class Earley_WithLexer(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None): def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf, parser_conf, options) WithLexer.__init__(self, lexer_conf, parser_conf, options)
self.init_traditional_lexer()
self.init_lexer()


resolve_ambiguity = options.ambiguity == 'resolve' resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False debug = options.debug if options else False
@@ -179,6 +182,13 @@ class Earley(WithLexer):
def match(self, term, token): def match(self, term, token):
return term.name == token.type return term.name == token.type


def init_lexer(self, **kw):
raise NotImplementedError()

class Earley_Traditional(Earley_WithLexer):
def init_lexer(self, **kw):
self.init_traditional_lexer()



class XEarley(_ParserFrontend): class XEarley(_ParserFrontend):
def __init__(self, lexer_conf, parser_conf, options=None, **kw): def __init__(self, lexer_conf, parser_conf, options=None, **kw):


+ 1
- 14
tests/__main__.py View File

@@ -20,20 +20,7 @@ except ImportError:


from .test_logger import Testlogger from .test_logger import Testlogger


from .test_parser import (
TestLalrStandard,
TestEarleyStandard,
TestCykStandard,
TestLalrContextual,
TestEarleyDynamic,
TestLalrCustom,

# TestFullEarleyStandard,
TestFullEarleyDynamic,
TestFullEarleyDynamic_complete,

TestParsers,
)
from .test_parser import * # We define __all__ to list which TestSuites to run


logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)




+ 31
- 9
tests/test_parser.py View File

@@ -39,8 +39,7 @@ from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter from lark.indenter import Indenter


logger.setLevel(logging.INFO)

__all__ = ['TestParsers']


__path__ = os.path.dirname(__file__) __path__ = os.path.dirname(__file__)
def _read(n, *args): def _read(n, *args):
@@ -856,8 +855,9 @@ def _make_full_earley_test(LEXER):
_NAME = "TestFullEarley" + LEXER.capitalize() _NAME = "TestFullEarley" + LEXER.capitalize()
_TestFullEarley.__name__ = _NAME _TestFullEarley.__name__ = _NAME
globals()[_NAME] = _TestFullEarley globals()[_NAME] = _TestFullEarley
__all__.append(_NAME)


class CustomLexer(Lexer):
class CustomLexerNew(Lexer):
""" """
Purpose of this custom lexer is to test the integration, Purpose of this custom lexer is to test the integration,
so it uses the traditionalparser as implementation without custom lexing behaviour. so it uses the traditionalparser as implementation without custom lexing behaviour.
@@ -868,6 +868,18 @@ class CustomLexer(Lexer):
return self.lexer.lex(*args, **kwargs) return self.lexer.lex(*args, **kwargs)
__future_interface__ = True __future_interface__ = True
class CustomLexerOld(Lexer):
"""
Purpose of this custom lexer is to test the integration,
so it uses the traditionalparser as implementation without custom lexing behaviour.
"""
def __init__(self, lexer_conf):
self.lexer = TraditionalLexer(copy(lexer_conf))
def lex(self, *args, **kwargs):
return self.lexer.lex(*args, **kwargs)
__future_interface__ = False


def _tree_structure_check(a, b): def _tree_structure_check(a, b):
""" """
@@ -941,12 +953,18 @@ class DualBytesLark:
self.bytes_lark.load(f) self.bytes_lark.load(f)


def _make_parser_test(LEXER, PARSER): def _make_parser_test(LEXER, PARSER):
lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
if LEXER == 'custom_new':
lexer_class_or_name = CustomLexerNew
elif LEXER == 'custom_old':
lexer_class_or_name = CustomLexerOld
else:
lexer_class_or_name = LEXER
def _Lark(grammar, **kwargs): def _Lark(grammar, **kwargs):
return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
def _Lark_open(gfilename, **kwargs): def _Lark_open(gfilename, **kwargs):
return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)



class _TestParser(unittest.TestCase): class _TestParser(unittest.TestCase):
def test_basic1(self): def test_basic1(self):
g = _Lark("""start: a+ b a* "b" a* g = _Lark("""start: a+ b a* "b" a*
@@ -1992,7 +2010,7 @@ def _make_parser_test(LEXER, PARSER):






@unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules")
@unittest.skipIf(PARSER != 'earley' or 'dynamic' not in LEXER, "Currently only Earley supports priority sum in rules")
def test_prioritization_sum(self): def test_prioritization_sum(self):
"Tests effect of priority on result" "Tests effect of priority on result"


@@ -2296,7 +2314,7 @@ def _make_parser_test(LEXER, PARSER):
parser = _Lark(grammar) parser = _Lark(grammar)




@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
@unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self): def test_serialize(self):
grammar = """ grammar = """
start: _ANY b "C" start: _ANY b "C"
@@ -2400,17 +2418,21 @@ def _make_parser_test(LEXER, PARSER):
_TestParser.__name__ = _NAME _TestParser.__name__ = _NAME
_TestParser.__qualname__ = "tests.test_parser." + _NAME _TestParser.__qualname__ = "tests.test_parser." + _NAME
globals()[_NAME] = _TestParser globals()[_NAME] = _TestParser
__all__.append(_NAME)


# Note: You still have to import them in __main__ for the tests to run # Note: You still have to import them in __main__ for the tests to run
_TO_TEST = [ _TO_TEST = [
('standard', 'earley'), ('standard', 'earley'),
('standard', 'cyk'), ('standard', 'cyk'),
('standard', 'lalr'),
('dynamic', 'earley'), ('dynamic', 'earley'),
('dynamic_complete', 'earley'), ('dynamic_complete', 'earley'),
('standard', 'lalr'),
('contextual', 'lalr'), ('contextual', 'lalr'),
('custom', 'lalr'),
# (None, 'earley'),
('custom_new', 'lalr'),
('custom_old', 'earley'),
] ]


for _LEXER, _PARSER in _TO_TEST: for _LEXER, _PARSER in _TO_TEST:


Loading…
Cancel
Save