Browse Source

Added custom lexer to earley.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 4 years ago
parent
commit
cf25c06420
3 changed files with 48 additions and 29 deletions
  1. +16
    -6
      lark/parser_frontends.py
  2. +1
    -14
      tests/__main__.py
  3. +31
    -9
      tests/test_parser.py

+ 16
- 6
lark/parser_frontends.py View File

@@ -28,10 +28,7 @@ def get_frontend(parser, lexer):
self.lexer = lexer(lexer_conf)
def lex(self, lexer_state, parser_state):
return self.lexer.lex(lexer_state.text)

class LALR_CustomLexerWrapper(LALR_WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
super(LALR_CustomLexerWrapper, self).__init__(lexer_conf, parser_conf, options=options)
def init_lexer(self):
future_interface = getattr(lexer, '__future_interface__', False)
if future_interface:
@@ -44,13 +41,19 @@ def get_frontend(parser, lexer):
raise ValueError('Unknown lexer: %s' % lexer)
elif parser=='earley':
if lexer=='standard':
return Earley
return Earley_Traditional
elif lexer=='dynamic':
return XEarley
elif lexer=='dynamic_complete':
return XEarley_CompleteLex
elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser')
elif issubclass(lexer, Lexer):
assert not getattr(lexer, '__future_interface__', False), "Earley doesn't support the future interface right now"
class Earley_CustomLexerWrapper(Earley_WithLexer):
def init_lexer(self, **kw):
self.lexer = lexer(self.lexer_conf)
return Earley_CustomLexerWrapper
else:
raise ValueError('Unknown lexer: %s' % lexer)
elif parser == 'cyk':
@@ -163,10 +166,10 @@ class LALR_ContextualLexer(LALR_WithLexer):
###}


class Earley(WithLexer):
class Earley_WithLexer(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf, parser_conf, options)
self.init_traditional_lexer()
self.init_lexer()

resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False
@@ -179,6 +182,13 @@ class Earley(WithLexer):
def match(self, term, token):
return term.name == token.type

def init_lexer(self, **kw):
raise NotImplementedError()

class Earley_Traditional(Earley_WithLexer):
def init_lexer(self, **kw):
self.init_traditional_lexer()


class XEarley(_ParserFrontend):
def __init__(self, lexer_conf, parser_conf, options=None, **kw):


+ 1
- 14
tests/__main__.py View File

@@ -20,20 +20,7 @@ except ImportError:

from .test_logger import Testlogger

from .test_parser import (
TestLalrStandard,
TestEarleyStandard,
TestCykStandard,
TestLalrContextual,
TestEarleyDynamic,
TestLalrCustom,

# TestFullEarleyStandard,
TestFullEarleyDynamic,
TestFullEarleyDynamic_complete,

TestParsers,
)
from .test_parser import * # We define __all__ to list which TestSuites to run

logger.setLevel(logging.INFO)



+ 31
- 9
tests/test_parser.py View File

@@ -39,8 +39,7 @@ from lark.grammar import Rule
from lark.lexer import TerminalDef, Lexer, TraditionalLexer
from lark.indenter import Indenter

logger.setLevel(logging.INFO)

__all__ = ['TestParsers']

__path__ = os.path.dirname(__file__)
def _read(n, *args):
@@ -856,8 +855,9 @@ def _make_full_earley_test(LEXER):
_NAME = "TestFullEarley" + LEXER.capitalize()
_TestFullEarley.__name__ = _NAME
globals()[_NAME] = _TestFullEarley
__all__.append(_NAME)

class CustomLexer(Lexer):
class CustomLexerNew(Lexer):
"""
Purpose of this custom lexer is to test the integration,
so it uses the traditionalparser as implementation without custom lexing behaviour.
@@ -868,6 +868,18 @@ class CustomLexer(Lexer):
return self.lexer.lex(*args, **kwargs)
__future_interface__ = True
class CustomLexerOld(Lexer):
"""
Purpose of this custom lexer is to test the integration,
so it uses the traditionalparser as implementation without custom lexing behaviour.
"""
def __init__(self, lexer_conf):
self.lexer = TraditionalLexer(copy(lexer_conf))
def lex(self, *args, **kwargs):
return self.lexer.lex(*args, **kwargs)
__future_interface__ = False

def _tree_structure_check(a, b):
"""
@@ -941,12 +953,18 @@ class DualBytesLark:
self.bytes_lark.load(f)

def _make_parser_test(LEXER, PARSER):
lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
if LEXER == 'custom_new':
lexer_class_or_name = CustomLexerNew
elif LEXER == 'custom_old':
lexer_class_or_name = CustomLexerOld
else:
lexer_class_or_name = LEXER
def _Lark(grammar, **kwargs):
return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
def _Lark_open(gfilename, **kwargs):
return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)


class _TestParser(unittest.TestCase):
def test_basic1(self):
g = _Lark("""start: a+ b a* "b" a*
@@ -1992,7 +2010,7 @@ def _make_parser_test(LEXER, PARSER):



@unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules")
@unittest.skipIf(PARSER != 'earley' or 'dynamic' not in LEXER, "Currently only Earley supports priority sum in rules")
def test_prioritization_sum(self):
"Tests effect of priority on result"

@@ -2296,7 +2314,7 @@ def _make_parser_test(LEXER, PARSER):
parser = _Lark(grammar)


@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
@unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self):
grammar = """
start: _ANY b "C"
@@ -2400,17 +2418,21 @@ def _make_parser_test(LEXER, PARSER):
_TestParser.__name__ = _NAME
_TestParser.__qualname__ = "tests.test_parser." + _NAME
globals()[_NAME] = _TestParser
__all__.append(_NAME)

# Note: You still have to import them in __main__ for the tests to run
_TO_TEST = [
('standard', 'earley'),
('standard', 'cyk'),
('standard', 'lalr'),
('dynamic', 'earley'),
('dynamic_complete', 'earley'),
('standard', 'lalr'),
('contextual', 'lalr'),
('custom', 'lalr'),
# (None, 'earley'),
('custom_new', 'lalr'),
('custom_old', 'earley'),
]

for _LEXER, _PARSER in _TO_TEST:


Loading…
Cancel
Save