Quellcode durchsuchen

Added the '!' prefix to keep all tokens in rule. Also removed tools (it's now in lark/tools)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan vor 7 Jahren
Ursprung
Commit
0b902b1d03
7 geänderte Dateien mit 31 neuen und 171 gelöschten Zeilen
  1. +1
    -1
      examples/conf.py
  2. +1
    -0
      lark/common.py
  3. +4
    -2
      lark/load_grammar.py
  4. +23
    -17
      lark/parse_tree_builder.py
  5. +2
    -1
      lark/parser_frontends.py
  6. +0
    -0
      tools/__init__.py
  7. +0
    -150
      tools/nearley.py

+ 1
- 1
examples/conf.py Datei anzeigen

@@ -34,4 +34,4 @@ a=Hello
this="that",4
"""

print parser.parse(sample_conf).pretty()
print(parser.parse(sample_conf).pretty())

+ 1
- 0
lark/common.py Datei anzeigen

@@ -39,6 +39,7 @@ class LexerConf:

class ParserConf:
def __init__(self, rules, callback, start):
assert all(len(r)==3 for r in rules)
self.rules = rules
self.callback = callback
self.start = start

+ 4
- 2
lark/load_grammar.py Datei anzeigen

@@ -61,7 +61,7 @@ TOKENS = {
'_COLON': ':',
'_OR': r'\|',
'_DOT': r'\.',
'RULE': '[_?*]?[a-z][_a-z0-9]*',
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
'TOKEN': '_?[A-Z][_A-Z0-9]*',
'STRING': r'".*?[^\\]"',
'REGEXP': r"/(?!/).*?[^\\]/",
@@ -302,6 +302,8 @@ class GrammarLoader:
raise GrammarError("Missing colon at line %s column %s" % (e.line, e.column))
elif 'tokenvalue' in e.expected:
raise GrammarError("Expecting a value at line %s column %s" % (e.line, e.column))
elif e.expected == ['_OR']:
raise GrammarError("Newline without starting a new option (Expecting '|') at line %s column %s" % (e.line, e.column))
raise

# =================
@@ -363,7 +365,7 @@ class GrammarLoader:
used_symbols = {symbol for expansions in rules.values()
for expansion, _alias in expansions
for symbol in expansion}
rule_set = {r.lstrip('?') for r in rules}
rule_set = {r.lstrip('!').lstrip('?') for r in rules}
for sym in used_symbols:
if is_terminal(sym):
if sym not in token_set:


+ 23
- 17
lark/parse_tree_builder.py Datei anzeigen

@@ -12,24 +12,25 @@ def create_expand1_tree_builder_function(tree_builder):
return tree_builder(children)
return expand1

def create_rule_handler(expansion, usermethod):
to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion)
if not (is_terminal(sym) and sym.startswith('_'))]

if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
def _build_ast(match):
children = []
for i, to_expand in to_include:
if to_expand:
children += match[i].children
else:
children.append(match[i])
def create_rule_handler(expansion, usermethod, keep_all_tokens):
if not keep_all_tokens:
to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion)
if not (is_terminal(sym) and sym.startswith('_'))]

if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
def _build_ast(match):
children = []
for i, to_expand in to_include:
if to_expand:
children += match[i].children
else:
children.append(match[i])

return usermethod(children)
else:
_build_ast = usermethod
return usermethod(children)
return _build_ast

return _build_ast
# else, if no filtering required..
return usermethod


class ParseTreeBuilder:
@@ -48,6 +49,11 @@ class ParseTreeBuilder:
callback = Callback()
new_rules = []
for origin, expansions in rules.items():
keep_all_tokens = False
if origin.startswith('!'):
origin=origin.lstrip('!')
keep_all_tokens = True

expand1 = origin.startswith('?')
_origin = origin.lstrip('?')

@@ -69,7 +75,7 @@ class ParseTreeBuilder:
if expand1:
f = create_expand1_tree_builder_function(f)

alias_handler = create_rule_handler(expansion, f)
alias_handler = create_rule_handler(expansion, f, keep_all_tokens)

if hasattr(callback, _alias):
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin))


+ 2
- 1
lark/parser_frontends.py Datei anzeigen

@@ -22,6 +22,7 @@ class WithLexer:
class LALR(WithLexer):
def __init__(self, lexer_conf, parser_conf):
WithLexer.__init__(self, lexer_conf)
self.parser_conf = parser_conf

analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
analyzer.analyze()
@@ -95,7 +96,7 @@ class Earley_NoLex:
regexp = self.token_by_name[sym].to_regexp()
width = sre_parse.parse(regexp).getwidth()
if not width == (1,1):
raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width))
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
yield sym, re.compile(regexp)
else:
yield sym


+ 0
- 0
tools/__init__.py Datei anzeigen


+ 0
- 150
tools/nearley.py Datei anzeigen

@@ -1,150 +0,0 @@
"Converts between Lark and Nearley grammars. Work in progress!"

import os.path
import sys

from lark import Lark, InlineTransformer

nearley_grammar = r"""
start: (ruledef|directive)+

directive: "@" NAME (STRING|NAME)
| "@" _JS -> js_code
ruledef: NAME "->" expansions
| NAME REGEXP "->" expansions -> macro
expansions: expansion ("|" expansion)*

expansion: expr+ _JS?

?expr: item [":" /[+*?]/]

?item: rule|string|regexp
| "(" expansions ")"

rule: NAME
string: STRING
regexp: REGEXP
_JS: /(?s){%.*?%}/

NAME: /[a-zA-Z_$]\w*/
WS.ignore: /[\t \f\n]+/
COMMENT.ignore: /\#[^\n]*/
REGEXP: /\[.*?\]/
STRING: /".*?"/

"""



class NearleyToLark(InlineTransformer):
def __init__(self, builtin_path):
self.builtin_path = builtin_path

def rule(self, name):
# return {'_': '_WS?', '__':'_WS'}.get(name, name)
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name)

def ruledef(self, name, exps):
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
return '%s: %s' % (name, exps)

def expr(self, item, op):
return '(%s)%s' % (item, op)

def regexp(self, r):
return '/%s/' % r

def string(self, s):
# TODO allow regular strings, and split them in the parser frontend
return ' '.join('"%s"'%ch for ch in s[1:-1])

def expansion(self, *x):
return ' '.join(x)

def expansions(self, *x):
return '(%s)' % ('\n |'.join(x))

def js_code(self):
return ''

def macro(self, *args):
return '' # TODO support macros?!

def directive(self, name, *args):
if name == 'builtin':
arg = args[0][1:-1]
with open(os.path.join(self.builtin_path, arg)) as f:
text = f.read()
return nearley_to_lark(text, self.builtin_path)
elif name == 'preprocessor':
return ''

raise Exception('Unknown directive: %s' % name)

def start(self, *rules):
return '\n'.join(filter(None, rules))

def nearley_to_lark(g, builtin_path):
parser = Lark(nearley_grammar)
tree = parser.parse(g)
return NearleyToLark(builtin_path).transform(tree)


def test():
css_example_grammar = """
# http://www.w3.org/TR/css3-color/#colorunits

@builtin "whitespace.ne"
@builtin "number.ne"
@builtin "postprocessors.ne"

csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
function(d) {
return {
"r": parseInt(d[1]+d[2], 16),
"g": parseInt(d[3]+d[4], 16),
"b": parseInt(d[5]+d[6], 16),
}
}
%}
| "#" hexdigit hexdigit hexdigit {%
function(d) {
return {
"r": parseInt(d[1]+d[1], 16),
"g": parseInt(d[2]+d[2], 16),
"b": parseInt(d[3]+d[3], 16),
}
}
%}
| "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
| "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
| "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
| "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}

hexdigit -> [a-fA-F0-9]
colnum -> unsigned_int {% id %} | percentage {%
function(d) {return Math.floor(d[0]*255); }
%}
"""
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin')
print(converted_grammar)

l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
print(l.parse('#a199ff').pretty())
print(l.parse('rgb(255, 70%, 3)').pretty())


def main():
try:
nearley_lib = sys.argv[1]
except IndexError:
print("Reads Nearley grammar from stdin and outputs a lark grammar.")
print("Usage: %s <nearley_lib_path>" % sys.argv[0])
return

grammar = sys.stdin.read()
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))


if __name__ == '__main__':
main()

Laden…
Abbrechen
Speichern