diff --git a/lark/common.py b/lark/common.py index ff1897a..1717fe7 100644 --- a/lark/common.py +++ b/lark/common.py @@ -15,8 +15,6 @@ class GrammarError(Exception): class ParseError(Exception): pass -###} - class UnexpectedToken(ParseError): def __init__(self, token, expected, seq, index): self.token = token @@ -37,6 +35,8 @@ class UnexpectedToken(ParseError): super(UnexpectedToken, self).__init__(message) +###} + class LexerConf: diff --git a/lark/indenter.py b/lark/indenter.py index a5f107d..34e61a0 100644 --- a/lark/indenter.py +++ b/lark/indenter.py @@ -2,6 +2,7 @@ from .lexer import Token +###{standalone class Indenter: def __init__(self): self.paren_level = 0 @@ -50,3 +51,5 @@ class Indenter: @property def always_accept(self): return (self.NL_type,) + +###} diff --git a/lark/lexer.py b/lark/lexer.py index 4f673f6..844025d 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -101,25 +101,23 @@ class _Lex: if line_ctr.char_pos < len(stream): raise UnexpectedInput(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column) break -###} - -def _regexp_has_newline(r): - return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) +class UnlessCallback: + def __init__(self, mres): + self.mres = mres -def _create_unless_callback(strs): - mres = build_mres(strs, match_whole=True) - def unless_callback(t): - # if t in strs: - # t.type = strs[t] - for mre, type_from_index in mres: + def __call__(self, t): + for mre, type_from_index in self.mres: m = mre.match(t.value) if m: value = m.group(0) t.type = type_from_index[m.lastindex] break return t - return unless_callback + +###} + + def _create_unless(tokens): tokens_by_type = classify(tokens, lambda t: type(t.pattern)) @@ -136,7 +134,7 @@ def _create_unless(tokens): if strtok.pattern.flags <= retok.pattern.flags: embedded_strs.add(strtok) if unless: - callback[retok.name] = _create_unless_callback(unless) + callback[retok.name] = UnlessCallback(build_mres(unless, match_whole=True)) tokens = [t for t in tokens if t not in embedded_strs] return tokens, callback @@ -161,7 +159,8 @@ def _build_mres(tokens, max_size, match_whole): def build_mres(tokens, match_whole=False): return _build_mres(tokens, len(tokens), match_whole) - +def _regexp_has_newline(r): + return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) class Lexer: def __init__(self, tokens, ignore=()): diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 45bd18d..7a9f5a2 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -54,6 +54,7 @@ EXTRACT_STANDALONE_FILES = [ 'utils.py', 'common.py', 'tree.py', + 'indenter.py', 'lexer.py', 'parse_tree_builder.py', 'parsers/lalr_parser.py', @@ -81,22 +82,27 @@ def extract_sections(lines): class LexerAtoms: def __init__(self, lexer): - assert not lexer.callback self.mres = [(p.pattern,d) for p,d in lexer.mres] self.newline_types = lexer.newline_types self.ignore_types = lexer.ignore_types + self.callback = {name:[(p.pattern,d) for p,d in c.mres] + for name, c in lexer.callback.items()} def print_python(self): print('import re') print('MRES = (') pprint(self.mres) print(')') + print('LEXER_CALLBACK = (') + pprint(self.callback) + print(')') print('NEWLINE_TYPES = %s' % self.newline_types) print('IGNORE_TYPES = %s' % self.ignore_types) print('class LexerRegexps: pass') print('lexer_regexps = LexerRegexps()') print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]') - print('lexer_regexps.callback = {}') + print('lexer_regexps.callback = {n: UnlessCallback([(re.compile(p), d) for p, d in mres])') + print(' for n, mres in LEXER_CALLBACK.items()}') print('lexer = _Lex(lexer_regexps)') print('def lex(stream):') print(' return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)') @@ -132,12 +138,15 @@ class ParserAtoms: print('parse_table.start_state = %s' % self.parse_table.start_state) print('parse_table.end_state = %s' % self.parse_table.end_state) print('class Lark_StandAlone:') - print(' def __init__(self, transformer=None):') + print(' def __init__(self, transformer=None, postlex=None):') print(' callback = parse_tree_builder.create_callback(transformer=transformer)') print(' callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES}') print(' self.parser = _Parser(parse_table, callbacks)') + print(' self.postlex = postlex') print(' def parse(self, stream):') - print(' return self.parser.parse(lex(stream))') + print(' tokens = lex(stream)') + print(' if self.postlex: tokens = self.postlex.process(tokens)') + print(' return self.parser.parse(tokens)') class TreeBuilderAtoms: def __init__(self, lark): @@ -152,9 +161,9 @@ class TreeBuilderAtoms: print('RULES = list(RULE_ID.values())') print('parse_tree_builder = ParseTreeBuilder(RULES, Tree)') -def main(fn): +def main(fn, start): with codecs.open(fn, encoding='utf8') as f: - lark_inst = Lark(f, parser="lalr") + lark_inst = Lark(f, parser="lalr", start=start) lexer_atoms = LexerAtoms(lark_inst.parser.lexer) parser_atoms = ParserAtoms(lark_inst.parser.parser) @@ -175,9 +184,14 @@ def main(fn): if __name__ == '__main__': if len(sys.argv) < 2: print("Lark Stand-alone Generator Tool") - print("Usage: python -m lark.tools.standalone ") + print("Usage: python -m lark.tools.standalone []") sys.exit(1) - fn ,= sys.argv[1:] + if len(sys.argv) == 3: + fn, start = sys.argv[1:] + elif len(sys.argv) == 2: + fn, start = sys.argv[1], 'start' + else: + assert False, sys.argv - main(fn) + main(fn, start)