@@ -34,4 +34,4 @@ a=Hello | |||||
this="that",4 | this="that",4 | ||||
""" | """ | ||||
print parser.parse(sample_conf).pretty() | |||||
print(parser.parse(sample_conf).pretty()) |
@@ -39,6 +39,7 @@ class LexerConf: | |||||
class ParserConf: | class ParserConf: | ||||
def __init__(self, rules, callback, start): | def __init__(self, rules, callback, start): | ||||
assert all(len(r)==3 for r in rules) | |||||
self.rules = rules | self.rules = rules | ||||
self.callback = callback | self.callback = callback | ||||
self.start = start | self.start = start |
@@ -61,7 +61,7 @@ TOKENS = { | |||||
'_COLON': ':', | '_COLON': ':', | ||||
'_OR': r'\|', | '_OR': r'\|', | ||||
'_DOT': r'\.', | '_DOT': r'\.', | ||||
'RULE': '[_?*]?[a-z][_a-z0-9]*', | |||||
'RULE': '!?[_?]?[a-z][_a-z0-9]*', | |||||
'TOKEN': '_?[A-Z][_A-Z0-9]*', | 'TOKEN': '_?[A-Z][_A-Z0-9]*', | ||||
'STRING': r'".*?[^\\]"', | 'STRING': r'".*?[^\\]"', | ||||
'REGEXP': r"/(?!/).*?[^\\]/", | 'REGEXP': r"/(?!/).*?[^\\]/", | ||||
@@ -302,6 +302,8 @@ class GrammarLoader: | |||||
raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | ||||
elif 'tokenvalue' in e.expected: | elif 'tokenvalue' in e.expected: | ||||
raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column)) | raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column)) | ||||
elif e.expected == ['_OR']: | |||||
raise GrammarError("Newline without starting a new option (Expecting '|') at line %s column %s" % (e.line, e.column)) | |||||
raise | raise | ||||
# ================= | # ================= | ||||
@@ -363,7 +365,7 @@ class GrammarLoader: | |||||
used_symbols = {symbol for expansions in rules.values() | used_symbols = {symbol for expansions in rules.values() | ||||
for expansion, _alias in expansions | for expansion, _alias in expansions | ||||
for symbol in expansion} | for symbol in expansion} | ||||
rule_set = {r.lstrip('?') for r in rules} | |||||
rule_set = {r.lstrip('!').lstrip('?') for r in rules} | |||||
for sym in used_symbols: | for sym in used_symbols: | ||||
if is_terminal(sym): | if is_terminal(sym): | ||||
if sym not in token_set: | if sym not in token_set: | ||||
@@ -12,24 +12,25 @@ def create_expand1_tree_builder_function(tree_builder): | |||||
return tree_builder(children) | return tree_builder(children) | ||||
return expand1 | return expand1 | ||||
def create_rule_handler(expansion, usermethod): | |||||
to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||||
if not (is_terminal(sym) and sym.startswith('_'))] | |||||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||||
def _build_ast(match): | |||||
children = [] | |||||
for i, to_expand in to_include: | |||||
if to_expand: | |||||
children += match[i].children | |||||
else: | |||||
children.append(match[i]) | |||||
def create_rule_handler(expansion, usermethod, keep_all_tokens): | |||||
if not keep_all_tokens: | |||||
to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||||
if not (is_terminal(sym) and sym.startswith('_'))] | |||||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||||
def _build_ast(match): | |||||
children = [] | |||||
for i, to_expand in to_include: | |||||
if to_expand: | |||||
children += match[i].children | |||||
else: | |||||
children.append(match[i]) | |||||
return usermethod(children) | |||||
else: | |||||
_build_ast = usermethod | |||||
return usermethod(children) | |||||
return _build_ast | |||||
return _build_ast | |||||
# else, if no filtering required.. | |||||
return usermethod | |||||
class ParseTreeBuilder: | class ParseTreeBuilder: | ||||
@@ -48,6 +49,11 @@ class ParseTreeBuilder: | |||||
callback = Callback() | callback = Callback() | ||||
new_rules = [] | new_rules = [] | ||||
for origin, expansions in rules.items(): | for origin, expansions in rules.items(): | ||||
keep_all_tokens = False | |||||
if origin.startswith('!'): | |||||
origin=origin.lstrip('!') | |||||
keep_all_tokens = True | |||||
expand1 = origin.startswith('?') | expand1 = origin.startswith('?') | ||||
_origin = origin.lstrip('?') | _origin = origin.lstrip('?') | ||||
@@ -69,7 +75,7 @@ class ParseTreeBuilder: | |||||
if expand1: | if expand1: | ||||
f = create_expand1_tree_builder_function(f) | f = create_expand1_tree_builder_function(f) | ||||
alias_handler = create_rule_handler(expansion, f) | |||||
alias_handler = create_rule_handler(expansion, f, keep_all_tokens) | |||||
if hasattr(callback, _alias): | if hasattr(callback, _alias): | ||||
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | ||||
@@ -22,6 +22,7 @@ class WithLexer: | |||||
class LALR(WithLexer): | class LALR(WithLexer): | ||||
def __init__(self, lexer_conf, parser_conf): | def __init__(self, lexer_conf, parser_conf): | ||||
WithLexer.__init__(self, lexer_conf) | WithLexer.__init__(self, lexer_conf) | ||||
self.parser_conf = parser_conf | |||||
analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | ||||
analyzer.analyze() | analyzer.analyze() | ||||
@@ -95,7 +96,7 @@ class Earley_NoLex: | |||||
regexp = self.token_by_name[sym].to_regexp() | regexp = self.token_by_name[sym].to_regexp() | ||||
width = sre_parse.parse(regexp).getwidth() | width = sre_parse.parse(regexp).getwidth() | ||||
if not width == (1,1): | if not width == (1,1): | ||||
raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width)) | |||||
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | |||||
yield sym, re.compile(regexp) | yield sym, re.compile(regexp) | ||||
else: | else: | ||||
yield sym | yield sym | ||||
@@ -1,150 +0,0 @@ | |||||
"Converts between Lark and Nearley grammars. Work in progress!" | |||||
import os.path | |||||
import sys | |||||
from lark import Lark, InlineTransformer | |||||
nearley_grammar = r""" | |||||
start: (ruledef|directive)+ | |||||
directive: "@" NAME (STRING|NAME) | |||||
| "@" _JS -> js_code | |||||
ruledef: NAME "->" expansions | |||||
| NAME REGEXP "->" expansions -> macro | |||||
expansions: expansion ("|" expansion)* | |||||
expansion: expr+ _JS? | |||||
?expr: item [":" /[+*?]/] | |||||
?item: rule|string|regexp | |||||
| "(" expansions ")" | |||||
rule: NAME | |||||
string: STRING | |||||
regexp: REGEXP | |||||
_JS: /(?s){%.*?%}/ | |||||
NAME: /[a-zA-Z_$]\w*/ | |||||
WS.ignore: /[\t \f\n]+/ | |||||
COMMENT.ignore: /\#[^\n]*/ | |||||
REGEXP: /\[.*?\]/ | |||||
STRING: /".*?"/ | |||||
""" | |||||
class NearleyToLark(InlineTransformer): | |||||
def __init__(self, builtin_path): | |||||
self.builtin_path = builtin_path | |||||
def rule(self, name): | |||||
# return {'_': '_WS?', '__':'_WS'}.get(name, name) | |||||
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | |||||
def ruledef(self, name, exps): | |||||
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | |||||
return '%s: %s' % (name, exps) | |||||
def expr(self, item, op): | |||||
return '(%s)%s' % (item, op) | |||||
def regexp(self, r): | |||||
return '/%s/' % r | |||||
def string(self, s): | |||||
# TODO allow regular strings, and split them in the parser frontend | |||||
return ' '.join('"%s"'%ch for ch in s[1:-1]) | |||||
def expansion(self, *x): | |||||
return ' '.join(x) | |||||
def expansions(self, *x): | |||||
return '(%s)' % ('\n |'.join(x)) | |||||
def js_code(self): | |||||
return '' | |||||
def macro(self, *args): | |||||
return '' # TODO support macros?! | |||||
def directive(self, name, *args): | |||||
if name == 'builtin': | |||||
arg = args[0][1:-1] | |||||
with open(os.path.join(self.builtin_path, arg)) as f: | |||||
text = f.read() | |||||
return nearley_to_lark(text, self.builtin_path) | |||||
elif name == 'preprocessor': | |||||
return '' | |||||
raise Exception('Unknown directive: %s' % name) | |||||
def start(self, *rules): | |||||
return '\n'.join(filter(None, rules)) | |||||
def nearley_to_lark(g, builtin_path): | |||||
parser = Lark(nearley_grammar) | |||||
tree = parser.parse(g) | |||||
return NearleyToLark(builtin_path).transform(tree) | |||||
def test(): | |||||
css_example_grammar = """ | |||||
# http://www.w3.org/TR/css3-color/#colorunits | |||||
@builtin "whitespace.ne" | |||||
@builtin "number.ne" | |||||
@builtin "postprocessors.ne" | |||||
csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% | |||||
function(d) { | |||||
return { | |||||
"r": parseInt(d[1]+d[2], 16), | |||||
"g": parseInt(d[3]+d[4], 16), | |||||
"b": parseInt(d[5]+d[6], 16), | |||||
} | |||||
} | |||||
%} | |||||
| "#" hexdigit hexdigit hexdigit {% | |||||
function(d) { | |||||
return { | |||||
"r": parseInt(d[1]+d[1], 16), | |||||
"g": parseInt(d[2]+d[2], 16), | |||||
"b": parseInt(d[3]+d[3], 16), | |||||
} | |||||
} | |||||
%} | |||||
| "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} | |||||
| "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} | |||||
| "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} | |||||
| "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} | |||||
hexdigit -> [a-fA-F0-9] | |||||
colnum -> unsigned_int {% id %} | percentage {% | |||||
function(d) {return Math.floor(d[0]*255); } | |||||
%} | |||||
""" | |||||
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') | |||||
print(converted_grammar) | |||||
l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') | |||||
print(l.parse('#a199ff').pretty()) | |||||
print(l.parse('rgb(255, 70%, 3)').pretty()) | |||||
def main(): | |||||
try: | |||||
nearley_lib = sys.argv[1] | |||||
except IndexError: | |||||
print("Reads Nearley grammar from stdin and outputs a lark grammar.") | |||||
print("Usage: %s <nearley_lib_path>" % sys.argv[0]) | |||||
return | |||||
grammar = sys.stdin.read() | |||||
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) | |||||
if __name__ == '__main__': | |||||
main() |