|
|
@@ -0,0 +1,184 @@ |
|
|
|
###{standalone |
|
|
|
# |
|
|
|
# |
|
|
|
# Lark Stand-alone Generator Tool |
|
|
|
# ---------------------------------- |
|
|
|
# Git: https://github.com/erezsh/lark |
|
|
|
# Author: Erez Shinan (erezshin@gmail.com) |
|
|
|
# |
|
|
|
# |
|
|
|
# >>> LICENSE |
|
|
|
# |
|
|
|
# This tool and its generated code use a separate license from Lark. |
|
|
|
# |
|
|
|
# It is licensed under GPLv2 or above. |
|
|
|
# |
|
|
|
# If you wish to purchase a commercial license for this tool and its |
|
|
|
# generated code, contact me via email. |
|
|
|
# |
|
|
|
# This program is free software: you can redistribute it and/or modify |
|
|
|
# it under the terms of the GNU General Public License as published by |
|
|
|
# the Free Software Foundation, either version 2 of the License, or |
|
|
|
# (at your option) any later version. |
|
|
|
# |
|
|
|
# This program is distributed in the hope that it will be useful, |
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
|
# GNU General Public License for more details. |
|
|
|
# |
|
|
|
# See <http://www.gnu.org/licenses/>. |
|
|
|
# |
|
|
|
# |
|
|
|
###} |
|
|
|
|
|
|
|
import codecs |
|
|
|
import sys |
|
|
|
import os |
|
|
|
from pprint import pprint |
|
|
|
from os import path |
|
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
import lark |
|
|
|
from lark import Lark |
|
|
|
|
|
|
|
from ..grammar import Rule |
|
|
|
|
|
|
|
__dir__ = path.dirname(__file__) |
|
|
|
__larkdir__ = path.join(__dir__, path.pardir) |
|
|
|
|
|
|
|
|
|
|
|
EXTRACT_STANDALONE_FILES = [ |
|
|
|
'tools/standalone.py', |
|
|
|
'utils.py', |
|
|
|
'common.py', |
|
|
|
'tree.py', |
|
|
|
'lexer.py', |
|
|
|
'parse_tree_builder.py', |
|
|
|
'parsers/lalr_parser.py', |
|
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
def extract_sections(lines): |
|
|
|
section = None |
|
|
|
text = [] |
|
|
|
sections = defaultdict(list) |
|
|
|
for l in lines: |
|
|
|
if l.startswith('###'): |
|
|
|
if l[3] == '{': |
|
|
|
section = l[4:].strip() |
|
|
|
elif l[3] == '}': |
|
|
|
sections[section] += text |
|
|
|
section = None |
|
|
|
text = [] |
|
|
|
else: |
|
|
|
raise ValueError(l) |
|
|
|
elif section: |
|
|
|
text.append(l) |
|
|
|
|
|
|
|
return {name:''.join(text) for name, text in sections.items()} |
|
|
|
|
|
|
|
class LexerAtoms: |
|
|
|
def __init__(self, lexer): |
|
|
|
assert not lexer.callback |
|
|
|
self.mres = [(p.pattern,d) for p,d in lexer.mres] |
|
|
|
self.newline_types = lexer.newline_types |
|
|
|
self.ignore_types = lexer.ignore_types |
|
|
|
|
|
|
|
def print_python(self): |
|
|
|
print('import re') |
|
|
|
print('MRES = (') |
|
|
|
pprint(self.mres) |
|
|
|
print(')') |
|
|
|
print('NEWLINE_TYPES = %s' % self.newline_types) |
|
|
|
print('IGNORE_TYPES = %s' % self.ignore_types) |
|
|
|
print('class LexerRegexps: pass') |
|
|
|
print('lexer_regexps = LexerRegexps()') |
|
|
|
print('lexer_regexps.mres = [(re.compile(p), d) for p, d in MRES]') |
|
|
|
print('lexer_regexps.callback = {}') |
|
|
|
print('lexer = _Lex(lexer_regexps)') |
|
|
|
print('def lex(stream):') |
|
|
|
print(' return lexer.lex(stream, NEWLINE_TYPES, IGNORE_TYPES)') |
|
|
|
|
|
|
|
|
|
|
|
class GetRule: |
|
|
|
def __init__(self, rule_id): |
|
|
|
self.rule_id = rule_id |
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
return 'RULE_ID[%d]' % self.rule_id |
|
|
|
|
|
|
|
|
|
|
|
def get_rule_ids(x): |
|
|
|
if isinstance(x, (tuple, list)): |
|
|
|
return type(x)(map(get_rule_ids, x)) |
|
|
|
elif isinstance(x, dict): |
|
|
|
return {get_rule_ids(k):get_rule_ids(v) for k, v in x.items()} |
|
|
|
elif isinstance(x, Rule): |
|
|
|
return GetRule(id(x)) |
|
|
|
return x |
|
|
|
|
|
|
|
class ParserAtoms: |
|
|
|
def __init__(self, parser): |
|
|
|
self.parse_table = parser.analysis.parse_table |
|
|
|
|
|
|
|
def print_python(self): |
|
|
|
print('class ParseTable: pass') |
|
|
|
print('parse_table = ParseTable()') |
|
|
|
print('parse_table.states = (') |
|
|
|
pprint(get_rule_ids(self.parse_table.states)) |
|
|
|
print(')') |
|
|
|
print('parse_table.start_state = %s' % self.parse_table.start_state) |
|
|
|
print('parse_table.end_state = %s' % self.parse_table.end_state) |
|
|
|
print('class Lark_StandAlone:') |
|
|
|
print(' def __init__(self, transformer=None):') |
|
|
|
print(' callback = parse_tree_builder.create_callback(transformer=transformer)') |
|
|
|
print(' callbacks = {rule: getattr(callback, rule.alias or rule.origin, None) for rule in RULES}') |
|
|
|
print(' self.parser = _Parser(parse_table, callbacks)') |
|
|
|
print(' def parse(self, stream):') |
|
|
|
print(' return self.parser.parse(lex(stream))') |
|
|
|
|
|
|
|
class TreeBuilderAtoms: |
|
|
|
def __init__(self, lark): |
|
|
|
self.rules = lark.rules |
|
|
|
self.ptb = lark._parse_tree_builder |
|
|
|
|
|
|
|
def print_python(self): |
|
|
|
print('RULE_ID = {') |
|
|
|
for r in self.rules: |
|
|
|
print(' %d: Rule(%r, %r, %r, %r),' % (id(r), r.origin, r.expansion, self.ptb.user_aliases[r], r.options )) |
|
|
|
print('}') |
|
|
|
print('RULES = list(RULE_ID.values())') |
|
|
|
print('parse_tree_builder = ParseTreeBuilder(RULES, Tree)') |
|
|
|
|
|
|
|
def main(fn): |
|
|
|
with codecs.open(fn, encoding='utf8') as f: |
|
|
|
lark_inst = Lark(f, parser="lalr") |
|
|
|
|
|
|
|
lexer_atoms = LexerAtoms(lark_inst.parser.lexer) |
|
|
|
parser_atoms = ParserAtoms(lark_inst.parser.parser) |
|
|
|
tree_builder_atoms = TreeBuilderAtoms(lark_inst) |
|
|
|
|
|
|
|
print('# Generated by Lark v%s' % lark.__version__) |
|
|
|
|
|
|
|
|
|
|
|
for pyfile in EXTRACT_STANDALONE_FILES: |
|
|
|
print (extract_sections(open(os.path.join(__larkdir__, pyfile)))['standalone']) |
|
|
|
|
|
|
|
print(open(os.path.join(__larkdir__, 'grammar.py')).read()) |
|
|
|
print('Shift = 0') |
|
|
|
print('Reduce = 1') |
|
|
|
lexer_atoms.print_python() |
|
|
|
tree_builder_atoms.print_python() |
|
|
|
parser_atoms.print_python() |
|
|
|
|
|
|
|
# print('print(parser.parse(lex("1+2")).pretty())') |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
if len(sys.argv) < 2: |
|
|
|
print("Generates a stand-alone lalr parser") |
|
|
|
print("Usage: %s <grammar_path>" % sys.argv[0]) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
fn ,= sys.argv[1:] |
|
|
|
|
|
|
|
main(fn) |