| @@ -34,4 +34,4 @@ a=Hello | |||
| this="that",4 | |||
| """ | |||
| print parser.parse(sample_conf).pretty() | |||
| print(parser.parse(sample_conf).pretty()) | |||
| @@ -39,6 +39,7 @@ class LexerConf: | |||
| class ParserConf: | |||
| def __init__(self, rules, callback, start): | |||
| assert all(len(r)==3 for r in rules) | |||
| self.rules = rules | |||
| self.callback = callback | |||
| self.start = start | |||
| @@ -61,7 +61,7 @@ TOKENS = { | |||
| '_COLON': ':', | |||
| '_OR': r'\|', | |||
| '_DOT': r'\.', | |||
| 'RULE': '[_?*]?[a-z][_a-z0-9]*', | |||
| 'RULE': '!?[_?]?[a-z][_a-z0-9]*', | |||
| 'TOKEN': '_?[A-Z][_A-Z0-9]*', | |||
| 'STRING': r'".*?[^\\]"', | |||
| 'REGEXP': r"/(?!/).*?[^\\]/", | |||
| @@ -302,6 +302,8 @@ class GrammarLoader: | |||
| raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | |||
| elif 'tokenvalue' in e.expected: | |||
| raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column)) | |||
| elif e.expected == ['_OR']: | |||
| raise GrammarError("Newline without starting a new option (Expecting '|') at line %s column %s" % (e.line, e.column)) | |||
| raise | |||
| # ================= | |||
| @@ -363,7 +365,7 @@ class GrammarLoader: | |||
| used_symbols = {symbol for expansions in rules.values() | |||
| for expansion, _alias in expansions | |||
| for symbol in expansion} | |||
| rule_set = {r.lstrip('?') for r in rules} | |||
| rule_set = {r.lstrip('!').lstrip('?') for r in rules} | |||
| for sym in used_symbols: | |||
| if is_terminal(sym): | |||
| if sym not in token_set: | |||
| @@ -12,24 +12,25 @@ def create_expand1_tree_builder_function(tree_builder): | |||
| return tree_builder(children) | |||
| return expand1 | |||
| def create_rule_handler(expansion, usermethod): | |||
| to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||
| if not (is_terminal(sym) and sym.startswith('_'))] | |||
| if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
| def _build_ast(match): | |||
| children = [] | |||
| for i, to_expand in to_include: | |||
| if to_expand: | |||
| children += match[i].children | |||
| else: | |||
| children.append(match[i]) | |||
| def create_rule_handler(expansion, usermethod, keep_all_tokens): | |||
| if not keep_all_tokens: | |||
| to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||
| if not (is_terminal(sym) and sym.startswith('_'))] | |||
| if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
| def _build_ast(match): | |||
| children = [] | |||
| for i, to_expand in to_include: | |||
| if to_expand: | |||
| children += match[i].children | |||
| else: | |||
| children.append(match[i]) | |||
| return usermethod(children) | |||
| else: | |||
| _build_ast = usermethod | |||
| return usermethod(children) | |||
| return _build_ast | |||
| return _build_ast | |||
| # else, if no filtering required.. | |||
| return usermethod | |||
| class ParseTreeBuilder: | |||
| @@ -48,6 +49,11 @@ class ParseTreeBuilder: | |||
| callback = Callback() | |||
| new_rules = [] | |||
| for origin, expansions in rules.items(): | |||
| keep_all_tokens = False | |||
| if origin.startswith('!'): | |||
| origin=origin.lstrip('!') | |||
| keep_all_tokens = True | |||
| expand1 = origin.startswith('?') | |||
| _origin = origin.lstrip('?') | |||
| @@ -69,7 +75,7 @@ class ParseTreeBuilder: | |||
| if expand1: | |||
| f = create_expand1_tree_builder_function(f) | |||
| alias_handler = create_rule_handler(expansion, f) | |||
| alias_handler = create_rule_handler(expansion, f, keep_all_tokens) | |||
| if hasattr(callback, _alias): | |||
| raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||
| @@ -22,6 +22,7 @@ class WithLexer: | |||
| class LALR(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| self.parser_conf = parser_conf | |||
| analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | |||
| analyzer.analyze() | |||
| @@ -95,7 +96,7 @@ class Earley_NoLex: | |||
| regexp = self.token_by_name[sym].to_regexp() | |||
| width = sre_parse.parse(regexp).getwidth() | |||
| if not width == (1,1): | |||
| raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width)) | |||
| raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | |||
| yield sym, re.compile(regexp) | |||
| else: | |||
| yield sym | |||
| @@ -1,150 +0,0 @@ | |||
| "Converts between Lark and Nearley grammars. Work in progress!" | |||
| import os.path | |||
| import sys | |||
| from lark import Lark, InlineTransformer | |||
| nearley_grammar = r""" | |||
| start: (ruledef|directive)+ | |||
| directive: "@" NAME (STRING|NAME) | |||
| | "@" _JS -> js_code | |||
| ruledef: NAME "->" expansions | |||
| | NAME REGEXP "->" expansions -> macro | |||
| expansions: expansion ("|" expansion)* | |||
| expansion: expr+ _JS? | |||
| ?expr: item [":" /[+*?]/] | |||
| ?item: rule|string|regexp | |||
| | "(" expansions ")" | |||
| rule: NAME | |||
| string: STRING | |||
| regexp: REGEXP | |||
| _JS: /(?s){%.*?%}/ | |||
| NAME: /[a-zA-Z_$]\w*/ | |||
| WS.ignore: /[\t \f\n]+/ | |||
| COMMENT.ignore: /\#[^\n]*/ | |||
| REGEXP: /\[.*?\]/ | |||
| STRING: /".*?"/ | |||
| """ | |||
| class NearleyToLark(InlineTransformer): | |||
| def __init__(self, builtin_path): | |||
| self.builtin_path = builtin_path | |||
| def rule(self, name): | |||
| # return {'_': '_WS?', '__':'_WS'}.get(name, name) | |||
| return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | |||
| def ruledef(self, name, exps): | |||
| name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | |||
| return '%s: %s' % (name, exps) | |||
| def expr(self, item, op): | |||
| return '(%s)%s' % (item, op) | |||
| def regexp(self, r): | |||
| return '/%s/' % r | |||
| def string(self, s): | |||
| # TODO allow regular strings, and split them in the parser frontend | |||
| return ' '.join('"%s"'%ch for ch in s[1:-1]) | |||
| def expansion(self, *x): | |||
| return ' '.join(x) | |||
| def expansions(self, *x): | |||
| return '(%s)' % ('\n |'.join(x)) | |||
| def js_code(self): | |||
| return '' | |||
| def macro(self, *args): | |||
| return '' # TODO support macros?! | |||
| def directive(self, name, *args): | |||
| if name == 'builtin': | |||
| arg = args[0][1:-1] | |||
| with open(os.path.join(self.builtin_path, arg)) as f: | |||
| text = f.read() | |||
| return nearley_to_lark(text, self.builtin_path) | |||
| elif name == 'preprocessor': | |||
| return '' | |||
| raise Exception('Unknown directive: %s' % name) | |||
| def start(self, *rules): | |||
| return '\n'.join(filter(None, rules)) | |||
| def nearley_to_lark(g, builtin_path): | |||
| parser = Lark(nearley_grammar) | |||
| tree = parser.parse(g) | |||
| return NearleyToLark(builtin_path).transform(tree) | |||
| def test(): | |||
| css_example_grammar = """ | |||
| # http://www.w3.org/TR/css3-color/#colorunits | |||
| @builtin "whitespace.ne" | |||
| @builtin "number.ne" | |||
| @builtin "postprocessors.ne" | |||
| csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% | |||
| function(d) { | |||
| return { | |||
| "r": parseInt(d[1]+d[2], 16), | |||
| "g": parseInt(d[3]+d[4], 16), | |||
| "b": parseInt(d[5]+d[6], 16), | |||
| } | |||
| } | |||
| %} | |||
| | "#" hexdigit hexdigit hexdigit {% | |||
| function(d) { | |||
| return { | |||
| "r": parseInt(d[1]+d[1], 16), | |||
| "g": parseInt(d[2]+d[2], 16), | |||
| "b": parseInt(d[3]+d[3], 16), | |||
| } | |||
| } | |||
| %} | |||
| | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} | |||
| | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} | |||
| | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} | |||
| | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} | |||
| hexdigit -> [a-fA-F0-9] | |||
| colnum -> unsigned_int {% id %} | percentage {% | |||
| function(d) {return Math.floor(d[0]*255); } | |||
| %} | |||
| """ | |||
| converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') | |||
| print(converted_grammar) | |||
| l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') | |||
| print(l.parse('#a199ff').pretty()) | |||
| print(l.parse('rgb(255, 70%, 3)').pretty()) | |||
| def main(): | |||
| try: | |||
| nearley_lib = sys.argv[1] | |||
| except IndexError: | |||
| print("Reads Nearley grammar from stdin and outputs a lark grammar.") | |||
| print("Usage: %s <nearley_lib_path>" % sys.argv[0]) | |||
| return | |||
| grammar = sys.stdin.read() | |||
| print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) | |||
| if __name__ == '__main__': | |||
| main() | |||