@@ -34,4 +34,4 @@ a=Hello | |||
this="that",4 | |||
""" | |||
print parser.parse(sample_conf).pretty() | |||
print(parser.parse(sample_conf).pretty()) |
@@ -39,6 +39,7 @@ class LexerConf: | |||
class ParserConf: | |||
def __init__(self, rules, callback, start): | |||
assert all(len(r)==3 for r in rules) | |||
self.rules = rules | |||
self.callback = callback | |||
self.start = start |
@@ -61,7 +61,7 @@ TOKENS = { | |||
'_COLON': ':', | |||
'_OR': r'\|', | |||
'_DOT': r'\.', | |||
'RULE': '[_?*]?[a-z][_a-z0-9]*', | |||
'RULE': '!?[_?]?[a-z][_a-z0-9]*', | |||
'TOKEN': '_?[A-Z][_A-Z0-9]*', | |||
'STRING': r'".*?[^\\]"', | |||
'REGEXP': r"/(?!/).*?[^\\]/", | |||
@@ -302,6 +302,8 @@ class GrammarLoader: | |||
raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column)) | |||
elif 'tokenvalue' in e.expected: | |||
raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column)) | |||
elif e.expected == ['_OR']: | |||
raise GrammarError("Newline without starting a new option (Expecting '|') at line %s column %s" % (e.line, e.column)) | |||
raise | |||
# ================= | |||
@@ -363,7 +365,7 @@ class GrammarLoader: | |||
used_symbols = {symbol for expansions in rules.values() | |||
for expansion, _alias in expansions | |||
for symbol in expansion} | |||
rule_set = {r.lstrip('?') for r in rules} | |||
rule_set = {r.lstrip('!').lstrip('?') for r in rules} | |||
for sym in used_symbols: | |||
if is_terminal(sym): | |||
if sym not in token_set: | |||
@@ -12,24 +12,25 @@ def create_expand1_tree_builder_function(tree_builder): | |||
return tree_builder(children) | |||
return expand1 | |||
def create_rule_handler(expansion, usermethod): | |||
to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||
if not (is_terminal(sym) and sym.startswith('_'))] | |||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
def _build_ast(match): | |||
children = [] | |||
for i, to_expand in to_include: | |||
if to_expand: | |||
children += match[i].children | |||
else: | |||
children.append(match[i]) | |||
def create_rule_handler(expansion, usermethod, keep_all_tokens): | |||
if not keep_all_tokens: | |||
to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||
if not (is_terminal(sym) and sym.startswith('_'))] | |||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
def _build_ast(match): | |||
children = [] | |||
for i, to_expand in to_include: | |||
if to_expand: | |||
children += match[i].children | |||
else: | |||
children.append(match[i]) | |||
return usermethod(children) | |||
else: | |||
_build_ast = usermethod | |||
return usermethod(children) | |||
return _build_ast | |||
return _build_ast | |||
# else, if no filtering required.. | |||
return usermethod | |||
class ParseTreeBuilder: | |||
@@ -48,6 +49,11 @@ class ParseTreeBuilder: | |||
callback = Callback() | |||
new_rules = [] | |||
for origin, expansions in rules.items(): | |||
keep_all_tokens = False | |||
if origin.startswith('!'): | |||
origin=origin.lstrip('!') | |||
keep_all_tokens = True | |||
expand1 = origin.startswith('?') | |||
_origin = origin.lstrip('?') | |||
@@ -69,7 +75,7 @@ class ParseTreeBuilder: | |||
if expand1: | |||
f = create_expand1_tree_builder_function(f) | |||
alias_handler = create_rule_handler(expansion, f) | |||
alias_handler = create_rule_handler(expansion, f, keep_all_tokens) | |||
if hasattr(callback, _alias): | |||
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||
@@ -22,6 +22,7 @@ class WithLexer: | |||
class LALR(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf): | |||
WithLexer.__init__(self, lexer_conf) | |||
self.parser_conf = parser_conf | |||
analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start) | |||
analyzer.analyze() | |||
@@ -95,7 +96,7 @@ class Earley_NoLex: | |||
regexp = self.token_by_name[sym].to_regexp() | |||
width = sre_parse.parse(regexp).getwidth() | |||
if not width == (1,1): | |||
raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width)) | |||
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width)) | |||
yield sym, re.compile(regexp) | |||
else: | |||
yield sym | |||
@@ -1,150 +0,0 @@ | |||
"Converts between Lark and Nearley grammars. Work in progress!" | |||
import os.path | |||
import sys | |||
from lark import Lark, InlineTransformer | |||
nearley_grammar = r""" | |||
start: (ruledef|directive)+ | |||
directive: "@" NAME (STRING|NAME) | |||
| "@" _JS -> js_code | |||
ruledef: NAME "->" expansions | |||
| NAME REGEXP "->" expansions -> macro | |||
expansions: expansion ("|" expansion)* | |||
expansion: expr+ _JS? | |||
?expr: item [":" /[+*?]/] | |||
?item: rule|string|regexp | |||
| "(" expansions ")" | |||
rule: NAME | |||
string: STRING | |||
regexp: REGEXP | |||
_JS: /(?s){%.*?%}/ | |||
NAME: /[a-zA-Z_$]\w*/ | |||
WS.ignore: /[\t \f\n]+/ | |||
COMMENT.ignore: /\#[^\n]*/ | |||
REGEXP: /\[.*?\]/ | |||
STRING: /".*?"/ | |||
""" | |||
class NearleyToLark(InlineTransformer): | |||
def __init__(self, builtin_path): | |||
self.builtin_path = builtin_path | |||
def rule(self, name): | |||
# return {'_': '_WS?', '__':'_WS'}.get(name, name) | |||
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | |||
def ruledef(self, name, exps): | |||
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | |||
return '%s: %s' % (name, exps) | |||
def expr(self, item, op): | |||
return '(%s)%s' % (item, op) | |||
def regexp(self, r): | |||
return '/%s/' % r | |||
def string(self, s): | |||
# TODO allow regular strings, and split them in the parser frontend | |||
return ' '.join('"%s"'%ch for ch in s[1:-1]) | |||
def expansion(self, *x): | |||
return ' '.join(x) | |||
def expansions(self, *x): | |||
return '(%s)' % ('\n |'.join(x)) | |||
def js_code(self): | |||
return '' | |||
def macro(self, *args): | |||
return '' # TODO support macros?! | |||
def directive(self, name, *args): | |||
if name == 'builtin': | |||
arg = args[0][1:-1] | |||
with open(os.path.join(self.builtin_path, arg)) as f: | |||
text = f.read() | |||
return nearley_to_lark(text, self.builtin_path) | |||
elif name == 'preprocessor': | |||
return '' | |||
raise Exception('Unknown directive: %s' % name) | |||
def start(self, *rules): | |||
return '\n'.join(filter(None, rules)) | |||
def nearley_to_lark(g, builtin_path): | |||
parser = Lark(nearley_grammar) | |||
tree = parser.parse(g) | |||
return NearleyToLark(builtin_path).transform(tree) | |||
def test(): | |||
css_example_grammar = """ | |||
# http://www.w3.org/TR/css3-color/#colorunits | |||
@builtin "whitespace.ne" | |||
@builtin "number.ne" | |||
@builtin "postprocessors.ne" | |||
csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% | |||
function(d) { | |||
return { | |||
"r": parseInt(d[1]+d[2], 16), | |||
"g": parseInt(d[3]+d[4], 16), | |||
"b": parseInt(d[5]+d[6], 16), | |||
} | |||
} | |||
%} | |||
| "#" hexdigit hexdigit hexdigit {% | |||
function(d) { | |||
return { | |||
"r": parseInt(d[1]+d[1], 16), | |||
"g": parseInt(d[2]+d[2], 16), | |||
"b": parseInt(d[3]+d[3], 16), | |||
} | |||
} | |||
%} | |||
| "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} | |||
| "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} | |||
| "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} | |||
| "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} | |||
hexdigit -> [a-fA-F0-9] | |||
colnum -> unsigned_int {% id %} | percentage {% | |||
function(d) {return Math.floor(d[0]*255); } | |||
%} | |||
""" | |||
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') | |||
print(converted_grammar) | |||
l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') | |||
print(l.parse('#a199ff').pretty()) | |||
print(l.parse('rgb(255, 70%, 3)').pretty()) | |||
def main(): | |||
try: | |||
nearley_lib = sys.argv[1] | |||
except IndexError: | |||
print("Reads Nearley grammar from stdin and outputs a lark grammar.") | |||
print("Usage: %s <nearley_lib_path>" % sys.argv[0]) | |||
return | |||
grammar = sys.stdin.read() | |||
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) | |||
if __name__ == '__main__': | |||
main() |