(inspired by issue #116)tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
@@ -7,6 +7,7 @@ | |||||
- [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | ||||
- [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | ||||
- [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | ||||
- [lark\_grammar.py](lark_grammar.py) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) | |||||
### Advanced | ### Advanced | ||||
@@ -0,0 +1,49 @@ | |||||
start: (_item | _NL)* | |||||
_item: rule | |||||
| token | |||||
| statement | |||||
rule: RULE priority? ":" expansions _NL | |||||
token: TOKEN priority? ":" expansions _NL | |||||
priority: "." NUMBER | |||||
statement: "%ignore" expansions _NL -> ignore | |||||
| "%import" import_args ["->" TOKEN] _NL -> import | |||||
import_args: name ("." name)* | |||||
?expansions: alias (_VBAR alias)* | |||||
?alias: expansion ["->" RULE] | |||||
?expansion: expr* | |||||
?expr: atom [OP | "~" NUMBER [".." NUMBER]] | |||||
?atom: "(" expansions ")" | |||||
| "[" expansions "]" -> maybe | |||||
| STRING ".." STRING -> literal_range | |||||
| name | |||||
| (REGEXP | STRING) -> literal | |||||
name: RULE | |||||
| TOKEN | |||||
_VBAR: _NL? "|" | |||||
OP: /[+*][?]?|[?](?![a-z])/ | |||||
RULE: /!?[_?]?[a-z][_a-z0-9]*/ | |||||
TOKEN: /_?[A-Z][_A-Z0-9]*/ | |||||
STRING: _STRING "i"? | |||||
REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ | |||||
_NL: /(\r?\n)+\s*/ | |||||
%import common.ESCAPED_STRING -> _STRING | |||||
%import common.INT -> NUMBER | |||||
%import common.WS_INLINE | |||||
COMMENT: "//" /[^\n]/* | |||||
%ignore WS_INLINE | |||||
%ignore COMMENT |
@@ -0,0 +1,18 @@ | |||||
from lark import Lark | |||||
parser = Lark(open('examples/lark.g'), parser="lalr") | |||||
grammar_files = [ | |||||
'examples/python2.g', | |||||
'examples/python3.g', | |||||
'examples/lark.g', | |||||
'lark/grammars/common.g', | |||||
] | |||||
def test(): | |||||
for grammar_file in grammar_files: | |||||
tree = parser.parse(open(grammar_file).read()) | |||||
print("All grammars parsed successfully") | |||||
if __name__ == '__main__': | |||||
test() |
@@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER | |||||
// | // | ||||
// Strings | // Strings | ||||
// | // | ||||
//STRING: /"(\\\"|\\\\|[^"\n])*?"i?/ | |||||
STRING_INNER: ("\\\""|/[^"]/) | STRING_INNER: ("\\\""|/[^"]/) | ||||
ESCAPED_STRING: "\"" STRING_INNER* "\"" | ESCAPED_STRING: "\"" STRING_INNER* "\"" | ||||
@@ -122,7 +122,7 @@ RULES = { | |||||
'statement': ['ignore', 'import'], | 'statement': ['ignore', 'import'], | ||||
'ignore': ['_IGNORE expansions _NL'], | 'ignore': ['_IGNORE expansions _NL'], | ||||
'import': ['_IMPORT import_args _NL', | 'import': ['_IMPORT import_args _NL', | ||||
'_IMPORT import_args _TO TOKEN'], | |||||
'_IMPORT import_args _TO TOKEN _NL'], | |||||
'import_args': ['_import_args'], | 'import_args': ['_import_args'], | ||||
'_import_args': ['name', '_import_args _DOT name'], | '_import_args': ['name', '_import_args _DOT name'], | ||||
@@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer): | |||||
return p | return p | ||||
def expansion(self, items): | def expansion(self, items): | ||||
assert items | |||||
if len(items) == 1: | if len(items) == 1: | ||||
return items[0] | return items[0] | ||||
if len({i.flags for i in items}) > 1: | if len({i.flags for i in items}) > 1: | ||||
@@ -486,6 +487,11 @@ class Grammar: | |||||
# Convert token-trees to strings/regexps | # Convert token-trees to strings/regexps | ||||
transformer = PrepareLiterals() * TokenTreeToPattern() | transformer = PrepareLiterals() * TokenTreeToPattern() | ||||
for name, (token_tree, priority) in token_defs: | |||||
for t in token_tree.find_data('expansion'): | |||||
if not t.children: | |||||
raise GrammarError("Tokens cannot be empty (%s)" % name) | |||||
tokens = [TokenDef(name, transformer.transform(token_tree), priority) | tokens = [TokenDef(name, transformer.transform(token_tree), priority) | ||||
for name, (token_tree, priority) in token_defs] | for name, (token_tree, priority) in token_defs] | ||||