Browse Source

Changed parser/lexer interface in lark. Bumped minor version

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
a5a20a423a
11 changed files with 73 additions and 32 deletions
  1. +2
    -2
      examples/calc.py
  2. +1
    -1
      examples/conf.py
  3. +1
    -1
      examples/conf_nolex.py
  4. +3
    -2
      examples/indented_tree.py
  5. +1
    -1
      lark/__init__.py
  6. +19
    -6
      lark/lark.py
  7. +3
    -1
      lark/lexer.py
  8. +26
    -7
      lark/parser_frontends.py
  9. +5
    -5
      lark/reconstruct.py
  10. +1
    -1
      tests/__main__.py
  11. +11
    -5
      tests/test_parser.py

+ 2
- 2
examples/calc.py View File

@@ -22,13 +22,13 @@ calc_grammar = """
| product "*" atom -> mul
| product "/" atom -> div

?atom: DECIMAL -> number
?atom: NUMBER -> number
| "-" atom -> neg
| NAME -> var
| "(" sum ")"

%import common.CNAME -> NAME
%import common.DECIMAL
%import common.NUMBER
%import common.WS_INLINE

%ignore WS_INLINE


+ 1
- 1
examples/conf.py View File

@@ -26,7 +26,7 @@ parser = Lark(r"""

%ignore /[\t \f]+/
%ignore /\#[^\n]*/
""", parser="lalr_contextual_lexer")
""", parser="lalr", lexer="contextual")


sample_conf = """


+ 1
- 1
examples/conf_nolex.py View File

@@ -24,7 +24,7 @@ parser = Lark(r"""

_CR : /\r/
_LF : /\n/
""", parser="earley_nolex")
""", lexer=None)

class RestoreTokens(Transformer):
value = ''.join


+ 3
- 2
examples/indented_tree.py View File

@@ -16,9 +16,10 @@ tree_grammar = r"""

tree: NAME _NL [_INDENT tree+ _DEDENT]

NAME: /\w+/
%import common.CNAME -> NAME
%import common.WS_INLINE
%ignore WS_INLINE

WS.ignore: /\s+/
_NL: /(\r?\n[\t ]*)+/
_INDENT: "<INDENT>"
_DEDENT: "<DEDENT>"


+ 1
- 1
lark/__init__.py View File

@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError
from .lark import Lark
from .utils import inline_args

__version__ = "0.1.2"
__version__ = "0.2.0"

+ 19
- 6
lark/lark.py View File

@@ -11,7 +11,7 @@ from .common import GrammarError, LexerConf, ParserConf

from .lexer import Lexer
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import ENGINE_DICT
from .parser_frontends import get_frontend

class LarkOptions(object):
"""Specifies the options for Lark
@@ -19,7 +19,13 @@ class LarkOptions(object):
"""
OPTIONS_DOC = """
parser - Which parser engine to use ("earley" or "lalr". Default: "earley")
Note: Both will use Lark's lexer.
Note: "lalr" requires a lexer
lexer - Whether or not to use a lexer stage
None: Don't use a lexer
"standard": Use a standard lexer
"contextual": Stronger lexer (only works with parser="lalr")
"auto" (default): Choose for me based on grammar and parser

transformer - Applies the transformer to every parse tree
debug - Affects verbosity (default: False)
only_lex - Don't build a parser. Useful for debugging (default: False)
@@ -40,11 +46,12 @@ class LarkOptions(object):
self.cache_grammar = o.pop('cache_grammar', False)
self.postlex = o.pop('postlex', None)
self.parser = o.pop('parser', 'earley')
self.lexer = o.pop('lexer', 'auto')
self.transformer = o.pop('transformer', None)
self.start = o.pop('start', 'start')
self.profile = o.pop('profile', False)

assert self.parser in ENGINE_DICT
# assert self.parser in ENGINE_DICT
if self.parser == 'earley' and self.transformer:
raise ValueError('Cannot specify an auto-transformer when using the Earley algorithm.'
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
@@ -118,9 +125,15 @@ class Lark:

self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex)

if not self.options.only_lex:
if self.options.lexer == 'auto':
if self.options.parser == 'lalr':
self.options.lexer = 'standard'
elif self.options.parser == 'earley':
self.options.lexer = 'standard'

if self.options.parser:
self.parser = self._build_parser()
else:
elif self.options.lexer:
self.lexer = self._build_lexer()

if self.profiler: self.profiler.enter_section('outside_lark')
@@ -131,7 +144,7 @@ class Lark:
return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore)

def _build_parser(self):
self.parser_class = ENGINE_DICT[self.options.parser]
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class)
rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer)
if self.profiler:


+ 3
- 1
lark/lexer.py View File

@@ -88,7 +88,9 @@ class Lexer(object):
raise LexError("Cannot compile token: %s: %s" % (t.name, t.pattern))

token_names = {t.name for t in tokens}
assert all(t in token_names for t in ignore)
for t in ignore:
if t not in token_names:
raise LexError("Token '%s' was marked to ignore but it is not defined!" % t)

# Init
self.newline_types = [t.name for t in tokens if _regexp_has_newline(t.pattern.to_regexp())]


+ 26
- 7
lark/parser_frontends.py View File

@@ -131,7 +131,7 @@ class Earley_NoLex:
def _prepare_expansion(self, expansion):
for sym in expansion:
if is_terminal(sym):
regexp = self.token_by_name[sym].to_regexp()
regexp = self.token_by_name[sym].pattern.to_regexp()
width = sre_parse.parse(regexp).getwidth()
if not width == (1,1):
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
@@ -144,9 +144,28 @@ class Earley_NoLex:
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]

ENGINE_DICT = {
'lalr': LALR,
'earley': Earley,
'earley_nolex': Earley_NoLex,
'lalr_contextual_lexer': LALR_ContextualLexer
}

def get_frontend(parser, lexer):
if parser=='lalr':
if lexer is None:
raise ValueError('The LALR parser requires use of a lexer')
elif lexer == 'standard':
return LALR
elif lexer == 'contextual':
return LALR_ContextualLexer
else:
raise ValueError('Unknown lexer: %s' % lexer)
elif parser=='earley':
if lexer is None:
return Earley_NoLex
elif lexer=='standard':
return Earley
elif lexer=='contextual':
raise ValueError('The Earley parser does not support the contextual parser')
else:
raise ValueError('Unknown lexer: %s' % lexer)
else:
raise ValueError('Unknown parser: %s' % parser)




+ 5
- 5
lark/reconstruct.py View File

@@ -2,8 +2,8 @@ import re
from collections import defaultdict

from .tree import Tree
from .common import is_terminal, ParserConf
from .lexer import Token, TokenDef__Str
from .common import is_terminal, ParserConf, PatternStr
from .lexer import Token
from .parsers import earley
from .lark import Lark

@@ -22,7 +22,7 @@ def is_iter_empty(i):
class Reconstructor:
def __init__(self, parser):
tokens = {t.name:t for t in parser.lexer_conf.tokens}
token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens}
token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens}

class MatchData:
def __init__(self, data):
@@ -50,8 +50,8 @@ class Reconstructor:
for sym in self.expansion:
if is_discarded_terminal(sym):
t = tokens[sym]
assert isinstance(t, TokenDef__Str)
to_write.append(t.value)
assert isinstance(t.pattern, PatternStr)
to_write.append(t.pattern.value)
else:
x = next(args2)
if isinstance(x, list):


+ 1
- 1
tests/__main__.py View File

@@ -5,7 +5,7 @@ import logging

from .test_trees import TestTrees
# from .test_selectors import TestSelectors
from .test_parser import TestLalr, TestEarley, TestLalr_contextual_lexer, TestParsers
from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers
# from .test_grammars import TestPythonG, TestConfigG

logging.basicConfig(level=logging.INFO)


+ 11
- 5
tests/test_parser.py View File

@@ -42,9 +42,9 @@ class TestParsers(unittest.TestCase):
class TestEarley(unittest.TestCase):
pass

def _make_parser_test(PARSER):
def _make_parser_test(LEXER, PARSER):
def _Lark(grammar, **kwargs):
return Lark(grammar, parser=PARSER, **kwargs)
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs)
class _TestParser(unittest.TestCase):
def test_basic1(self):
g = _Lark("""start: a+ b a* "b" a*
@@ -397,12 +397,18 @@ def _make_parser_test(PARSER):
g.parse("+2e-9")
self.assertRaises(ParseError, g.parse, "+2e-9e")

_NAME = "Test" + PARSER.capitalize()
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'None').capitalize()
_TestParser.__name__ = _NAME
globals()[_NAME] = _TestParser

for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']:
_make_parser_test(PARSER)
_TO_TEST = [
('standard', 'earley'),
('standard', 'lalr'),
('contextual', 'lalr'),
]

for LEXER, PARSER in _TO_TEST:
_make_parser_test(LEXER, PARSER)


if __name__ == '__main__':


Loading…
Cancel
Save