Browse Source

Standalone tools now supports postlex

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.2
Erez Shinan 6 years ago
parent
commit
9b0672fda6
4 changed files with 40 additions and 24 deletions
  1. +2
    -2
      lark/common.py
  2. +3
    -0
      lark/indenter.py
  3. +12
    -13
      lark/lexer.py
  4. +23
    -9
      lark/tools/standalone.py

+ 2
- 2
lark/common.py View File

@@ -15,8 +15,6 @@ class GrammarError(Exception):
class ParseError(Exception):
pass

###}

class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
@@ -37,6 +35,8 @@ class UnexpectedToken(ParseError):
super(UnexpectedToken, self).__init__(message)


###}



class LexerConf:


+ 3
- 0
lark/indenter.py View File

@@ -2,6 +2,7 @@

from .lexer import Token

###{standalone
class Indenter:
def __init__(self):
self.paren_level = 0
@@ -50,3 +51,5 @@ class Indenter:
@property
def always_accept(self):
return (self.NL_type,)

###}

+ 12
- 13
lark/lexer.py View File

@@ -101,25 +101,23 @@ class _Lex:
if line_ctr.char_pos < len(stream):
raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column)
break
###}


def _regexp_has_newline(r):
return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)
class UnlessCallback:
def __init__(self, mres):
self.mres = mres

def _create_unless_callback(strs):
mres = build_mres(strs, match_whole=True)
def unless_callback(t):
# if t in strs:
# t.type = strs[t]
for mre, type_from_index in mres:
def __call__(self, t):
for mre, type_from_index in self.mres:
m = mre.match(t.value)
if m:
value = m.group(0)
t.type = type_from_index[m.lastindex]
break
return t
return unless_callback

###}



def _create_unless(tokens):
tokens_by_type = classify(tokens, lambda t: type(t.pattern))
@@ -136,7 +134,7 @@ def _create_unless(tokens):
if strtok.pattern.flags <= retok.pattern.flags:
embedded_strs.add(strtok)
if unless:
callback[retok.name] = _create_unless_callback(unless)
callback[retok.name] = UnlessCallback(build_mres(unless, match_whole=True))

tokens = [t for t in tokens if t not in embedded_strs]
return tokens, callback
@@ -161,7 +159,8 @@ def _build_mres(tokens, max_size, match_whole):
def build_mres(tokens, match_whole=False):
return _build_mres(tokens, len(tokens), match_whole)


def _regexp_has_newline(r):
return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)

class Lexer:
def __init__(self, tokens, ignore=()):


+ 23
- 9
lark/tools/standalone.py View File

@@ -54,6 +54,7 @@ EXTRACT_STANDALONE_FILES = [
'utils.py',
'common.py',
'tree.py',
'indenter.py',
'lexer.py',
'parse_tree_builder.py',
'parsers/lalr_parser.py',
@@ -81,22 +82,27 @@ def extract_sections(lines):

class LexerAtoms:
def __init__(self, lexer):
assert not lexer.callback
self.mres = [(p.pattern,d) for p,d in lexer.mres]
self.newline_types = lexer.newline_types
self.ignore_types = lexer.ignore_types
self.callback = {name:[(p.pattern,d) for p,d in c.mres]
for name, c in lexer.callback.items()}

def print_python(self):
print('import re')
print('MRES = (')
pprint(self.mres)
print(')')
print('LEXER_CALLBACK = (')
pprint(self.callback)
print(')')
print('NEWLINE_TYPES = %s' % self.newline_types)
print('IGNORE_TYPES = %s' % self.ignore_types)
print('class LexerRegexps: pass')
print('lexer_regexps = LexerRegexps()')
print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]')
print('lexer_regexps.callback = {}')
print('lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres])')
print(' for n, mres in LEXER_CALLBACK.items()}')
print('lexer = _Lex(lexer_regexps)')
print('def lex(stream):')
print(' return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)')
@@ -132,12 +138,15 @@ class ParserAtoms:
print('parse_table.start_state = %s' % self.parse_table.start_state)
print('parse_table.end_state = %s' % self.parse_table.end_state)
print('class Lark_StandAlone:')
print(' def __init__(self, transformer=None):')
print(' def __init__(self, transformer=None, postlex=None):')
print(' callback = parse_tree_builder.create_callback(transformer=transformer)')
print(' callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES}')
print(' self.parser = _Parser(parse_table, callbacks)')
print(' self.postlex = postlex')
print(' def parse(self, stream):')
print(' return self.parser.parse(lex(stream))')
print(' tokens = lex(stream)')
print(' if self.postlex: tokens = self.postlex.process(tokens)')
print(' return self.parser.parse(tokens)')

class TreeBuilderAtoms:
def __init__(self, lark):
@@ -152,9 +161,9 @@ class TreeBuilderAtoms:
print('RULES = list(RULE_ID.values())')
print('parse_tree_builder = ParseTreeBuilder(RULES, Tree)')

def main(fn):
def main(fn, start):
with codecs.open(fn, encoding='utf8') as f:
lark_inst = Lark(f, parser="lalr")
lark_inst = Lark(f, parser="lalr", start=start)

lexer_atoms = LexerAtoms(lark_inst.parser.lexer)
parser_atoms = ParserAtoms(lark_inst.parser.parser)
@@ -175,9 +184,14 @@ def main(fn):
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Lark Stand-alone Generator Tool")
print("Usage: python -m lark.tools.standalone <grammar-file>")
print("Usage: python -m lark.tools.standalone <grammar-file> [<start>]")
sys.exit(1)

fn ,= sys.argv[1:]
if len(sys.argv) == 3:
fn, start = sys.argv[1:]
elif len(sys.argv) == 2:
fn, start = sys.argv[1], 'start'
else:
assert False, sys.argv

main(fn)
main(fn, start)

Loading…
Cancel
Save