@@ -0,0 +1 @@ | |||||
.python-version |
@@ -1 +1 @@ | |||||
include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.g tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* | |||||
include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lark tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* |
@@ -79,7 +79,7 @@ By the way, if you're curious what these terminals signify, they are roughly equ | |||||
Lark will accept this, if you really want to complicate your life :) | Lark will accept this, if you really want to complicate your life :) | ||||
(You can find the original definitions in [common.g](/lark/grammars/common.g).) | |||||
(You can find the original definitions in [common.lark](/lark/grammars/common.lark).) | |||||
Notice that terminals are written in UPPER-CASE, while rules are written in lower-case. | Notice that terminals are written in UPPER-CASE, while rules are written in lower-case. | ||||
I'll touch more on the differences between rules and terminals later. | I'll touch more on the differences between rules and terminals later. | ||||
@@ -7,6 +7,7 @@ | |||||
- [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | ||||
- [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | ||||
- [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | ||||
- [lark\_grammar.py](lark_grammar.py) + [lark.lark](lark.lark) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) | |||||
### Advanced | ### Advanced | ||||
@@ -0,0 +1,49 @@ | |||||
start: (_item | _NL)* | |||||
_item: rule | |||||
| token | |||||
| statement | |||||
rule: RULE priority? ":" expansions _NL | |||||
token: TOKEN priority? ":" expansions _NL | |||||
priority: "." NUMBER | |||||
statement: "%ignore" expansions _NL -> ignore | |||||
| "%import" import_args ["->" TOKEN] _NL -> import | |||||
import_args: name ("." name)* | |||||
?expansions: alias (_VBAR alias)* | |||||
?alias: expansion ["->" RULE] | |||||
?expansion: expr* | |||||
?expr: atom [OP | "~" NUMBER [".." NUMBER]] | |||||
?atom: "(" expansions ")" | |||||
| "[" expansions "]" -> maybe | |||||
| STRING ".." STRING -> literal_range | |||||
| name | |||||
| (REGEXP | STRING) -> literal | |||||
name: RULE | |||||
| TOKEN | |||||
_VBAR: _NL? "|" | |||||
OP: /[+*][?]?|[?](?![a-z])/ | |||||
RULE: /!?[_?]?[a-z][_a-z0-9]*/ | |||||
TOKEN: /_?[A-Z][_A-Z0-9]*/ | |||||
STRING: _STRING "i"? | |||||
REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ | |||||
_NL: /(\r?\n)+\s*/ | |||||
%import common.ESCAPED_STRING -> _STRING | |||||
%import common.INT -> NUMBER | |||||
%import common.WS_INLINE | |||||
COMMENT: "//" /[^\n]/* | |||||
%ignore WS_INLINE | |||||
%ignore COMMENT |
@@ -0,0 +1,18 @@ | |||||
from lark import Lark | |||||
parser = Lark(open('examples/lark.lark'), parser="lalr") | |||||
grammar_files = [ | |||||
'examples/python2.lark', | |||||
'examples/python3.lark', | |||||
'examples/lark.lark', | |||||
'lark/grammars/common.lark', | |||||
] | |||||
def test(): | |||||
for grammar_file in grammar_files: | |||||
tree = parser.parse(open(grammar_file).read()) | |||||
print("All grammars parsed successfully") | |||||
if __name__ == '__main__': | |||||
test() |
@@ -22,10 +22,9 @@ class PythonIndenter(Indenter): | |||||
kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') | kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') | ||||
python_parser2 = Lark.open('python2.g', parser='lalr', **kwargs) | |||||
python_parser3 = Lark.open('python3.g',parser='lalr', **kwargs) | |||||
python_parser2_earley = Lark.open('python2.g', parser='earley', lexer='standard', **kwargs) | |||||
print(python_parser3) | |||||
python_parser2 = Lark.open('python2.lark', parser='lalr', **kwargs) | |||||
python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) | |||||
python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='standard', **kwargs) | |||||
def _read(fn, *args): | def _read(fn, *args): | ||||
@@ -1 +1 @@ | |||||
python -m lark.tools.standalone json.g > json_parser.py | |||||
python -m lark.tools.standalone json.lark > json_parser.py |
@@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER | |||||
// | // | ||||
// Strings | // Strings | ||||
// | // | ||||
//STRING: /"(\\\"|\\\\|[^"\n])*?"i?/ | |||||
STRING_INNER: ("\\\""|/[^"]/) | STRING_INNER: ("\\\""|/[^"]/) | ||||
ESCAPED_STRING: "\"" STRING_INNER* "\"" | ESCAPED_STRING: "\"" STRING_INNER* "\"" | ||||
@@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer): | |||||
return p | return p | ||||
def expansion(self, items): | def expansion(self, items): | ||||
assert items | |||||
if len(items) == 1: | if len(items) == 1: | ||||
return items[0] | return items[0] | ||||
if len({i.flags for i in items}) > 1: | if len({i.flags for i in items}) > 1: | ||||
@@ -611,7 +612,7 @@ class GrammarLoader: | |||||
elif stmt.data == 'import': | elif stmt.data == 'import': | ||||
dotted_path = stmt.children[0].children | dotted_path = stmt.children[0].children | ||||
name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | ||||
grammar_path = os.path.join(*dotted_path[:-1]) + '.g' | |||||
grammar_path = os.path.join(*dotted_path[:-1]) + '.lark' | |||||
g = import_grammar(grammar_path) | g = import_grammar(grammar_path) | ||||
token_options = dict(g.token_defs)[dotted_path[-1]] | token_options = dict(g.token_defs)[dotted_path[-1]] | ||||
assert isinstance(token_options, tuple) and len(token_options)==2 | assert isinstance(token_options, tuple) and len(token_options)==2 | ||||
@@ -11,7 +11,7 @@ setup( | |||||
requires = [], | requires = [], | ||||
install_requires = [], | install_requires = [], | ||||
package_data = { '': ['*.md', '*.g'] }, | |||||
package_data = { '': ['*.md', '*.lark'] }, | |||||
test_suite = 'tests.__main__', | test_suite = 'tests.__main__', | ||||