| @@ -1,225 +0,0 @@ | |||
| // Python 3 grammar for Lark | |||
| // This grammar should parse all python 3.x code successfully. | |||
| // Adapted from: https://docs.python.org/3/reference/grammar.html | |||
| // Adapted by: Erez Shinan | |||
| // Start symbols for the grammar: | |||
| // single_input is a single interactive statement; | |||
| // file_input is a module or sequence of commands read from an input file; | |||
| // eval_input is the input for the eval() functions. | |||
| // NB: compound_stmt in single_input is followed by extra NEWLINE! | |||
| single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE | |||
| file_input: (_NEWLINE | stmt)* | |||
| eval_input: testlist _NEWLINE* | |||
| decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE | |||
| decorators: decorator+ | |||
| decorated: decorators (classdef | funcdef | async_funcdef) | |||
| async_funcdef: "async" funcdef | |||
| funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite | |||
| parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]] | |||
| | starparams | |||
| | kwparams | |||
| SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result | |||
| starparams: "*" typedparam? ("," paramvalue)* ["," kwparams] | |||
| kwparams: "**" typedparam ","? | |||
| ?paramvalue: typedparam ("=" test)? | |||
| ?typedparam: NAME (":" test)? | |||
| lambdef: "lambda" [lambda_params] ":" test | |||
| lambdef_nocond: "lambda" [lambda_params] ":" test_nocond | |||
| lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] | |||
| | lambda_starparams | |||
| | lambda_kwparams | |||
| ?lambda_paramvalue: NAME ("=" test)? | |||
| lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]] | |||
| lambda_kwparams: "**" NAME ","? | |||
| ?stmt: simple_stmt | compound_stmt | |||
| ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE | |||
| ?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) | |||
| expr_stmt: testlist_star_expr | |||
| assign_stmt: annassign | augassign | assign | |||
| annassign: testlist_star_expr ":" test ["=" test] | |||
| assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ | |||
| augassign: testlist_star_expr augassign_op (yield_expr|testlist) | |||
| !augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" | |||
| ?testlist_star_expr: test_or_star_expr | |||
| | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple | |||
| | test_or_star_expr "," -> tuple | |||
| // For normal and annotated assignments, additional restrictions enforced by the interpreter | |||
| del_stmt: "del" exprlist | |||
| pass_stmt: "pass" | |||
| ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt | |||
| break_stmt: "break" | |||
| continue_stmt: "continue" | |||
| return_stmt: "return" [testlist] | |||
| yield_stmt: yield_expr | |||
| raise_stmt: "raise" [test ["from" test]] | |||
| import_stmt: import_name | import_from | |||
| import_name: "import" dotted_as_names | |||
| // note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS | |||
| import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) | |||
| !dots: "."+ | |||
| import_as_name: NAME ["as" NAME] | |||
| dotted_as_name: dotted_name ["as" NAME] | |||
| import_as_names: import_as_name ("," import_as_name)* [","] | |||
| dotted_as_names: dotted_as_name ("," dotted_as_name)* | |||
| dotted_name: NAME ("." NAME)* | |||
| global_stmt: "global" NAME ("," NAME)* | |||
| nonlocal_stmt: "nonlocal" NAME ("," NAME)* | |||
| assert_stmt: "assert" test ["," test] | |||
| ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | |||
| async_stmt: "async" (funcdef | with_stmt | for_stmt) | |||
| if_stmt: "if" test ":" suite elifs ["else" ":" suite] | |||
| elifs: elif_* | |||
| elif_: "elif" test ":" suite | |||
| while_stmt: "while" test ":" suite ["else" ":" suite] | |||
| for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] | |||
| try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] | |||
| | "try" ":" suite finally -> try_finally | |||
| finally: "finally" ":" suite | |||
| except_clauses: except_clause+ | |||
| except_clause: "except" [test ["as" NAME]] ":" suite | |||
| with_stmt: "with" with_items ":" suite | |||
| with_items: with_item ("," with_item)* | |||
| with_item: test ["as" expr] | |||
| // NB compile.c makes sure that the default except clause is last | |||
| suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT | |||
| ?test: or_test ("if" or_test "else" test)? | |||
| | lambdef | |||
| ?test_nocond: or_test | lambdef_nocond | |||
| ?or_test: and_test ("or" and_test)* | |||
| ?and_test: not_test_ ("and" not_test_)* | |||
| ?not_test_: "not" not_test_ -> not_test | |||
| | comparison | |||
| ?comparison: expr (comp_op expr)* | |||
| star_expr: "*" expr | |||
| ?expr: or_expr | |||
| ?or_expr: xor_expr ("|" xor_expr)* | |||
| ?xor_expr: and_expr ("^" and_expr)* | |||
| ?and_expr: shift_expr ("&" shift_expr)* | |||
| ?shift_expr: arith_expr (_shift_op arith_expr)* | |||
| ?arith_expr: term (_add_op term)* | |||
| ?term: factor (_mul_op factor)* | |||
| ?factor: _unary_op factor | power | |||
| !_unary_op: "+"|"-"|"~" | |||
| !_add_op: "+"|"-" | |||
| !_shift_op: "<<"|">>" | |||
| !_mul_op: "*"|"@"|"/"|"%"|"//" | |||
| // <> isn't actually a valid comparison operator in Python. It's here for the | |||
| // sake of a __future__ import described in PEP 401 (which really works :-) | |||
| !comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" | |||
| ?power: await_expr ("**" factor)? | |||
| ?await_expr: AWAIT? atom_expr | |||
| AWAIT: "await" | |||
| ?atom_expr: atom_expr "(" [arguments] ")" -> funccall | |||
| | atom_expr "[" subscriptlist "]" -> getitem | |||
| | atom_expr "." NAME -> getattr | |||
| | atom | |||
| ?atom: "(" yield_expr ")" | |||
| | "(" _tuple_inner? ")" -> tuple | |||
| | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension | |||
| | "[" _testlist_comp? "]" -> list | |||
| | "[" comprehension{test_or_star_expr} "]" -> list_comprehension | |||
| | "{" _dict_exprlist? "}" -> dict | |||
| | "{" comprehension{key_value} "}" -> dict_comprehension | |||
| | "{" _set_exprlist "}" -> set | |||
| | "{" comprehension{test} "}" -> set_comprehension | |||
| | NAME -> var | |||
| | number | |||
| | string_concat | |||
| | "(" test ")" | |||
| | "..." -> ellipsis | |||
| | "None" -> const_none | |||
| | "True" -> const_true | |||
| | "False" -> const_false | |||
| ?string_concat: string+ | |||
| _testlist_comp: test | _tuple_inner | |||
| _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") | |||
| ?test_or_star_expr: test | |||
| | star_expr | |||
| ?subscriptlist: subscript | |||
| | subscript (("," subscript)+ [","] | ",") -> subscript_tuple | |||
| ?subscript: test | ([test] ":" [test] [sliceop]) -> slice | |||
| sliceop: ":" [test] | |||
| ?exprlist: (expr|star_expr) | |||
| | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") | |||
| ?testlist: test | testlist_tuple | |||
| testlist_tuple: test (("," test)+ [","] | ",") | |||
| _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] | |||
| key_value: test ":" test | |||
| _set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] | |||
| classdef: "class" NAME ["(" [arguments] ")"] ":" suite | |||
| arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? | |||
| | starargs | |||
| | kwargs | |||
| | comprehension{test} | |||
| starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] | |||
| stararg: "*" test | |||
| kwargs: "**" test | |||
| ?argvalue: test ("=" test)? | |||
| comprehension{comp_result}: comp_result comp_fors [comp_if] | |||
| comp_fors: comp_for+ | |||
| comp_for: [ASYNC] "for" exprlist "in" or_test | |||
| ASYNC: "async" | |||
| ?comp_if: "if" test_nocond | |||
| // not used in grammar, but may appear in "node" passed from Parser to Compiler | |||
| encoding_decl: NAME | |||
| yield_expr: "yield" [testlist] | |||
| | "yield" "from" test -> yield_from | |||
| number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER | |||
| string: STRING | LONG_STRING | |||
| // Import terminals from standard library (grammars/python.lark) | |||
| %import python (NAME, COMMENT, STRING, LONG_STRING) | |||
| %import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER) | |||
| // Other terminals | |||
| _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | |||
| %ignore /[\t \f]+/ // WS | |||
| %ignore /\\[\t \f]*\r?\n/ // LINE_CONT | |||
| %ignore COMMENT | |||
| %declare _INDENT _DEDENT | |||
| @@ -14,7 +14,6 @@ import glob, time | |||
| from lark import Lark | |||
| from lark.indenter import Indenter | |||
| # __path__ = os.path.dirname(__file__) | |||
| class PythonIndenter(Indenter): | |||
| NL_type = '_NEWLINE' | |||
| @@ -24,11 +23,14 @@ class PythonIndenter(Indenter): | |||
| DEDENT_type = '_DEDENT' | |||
| tab_len = 8 | |||
| kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') | |||
| kwargs = dict(postlex=PythonIndenter(), start='file_input') | |||
| python_parser2 = Lark.open('python2.lark', parser='lalr', **kwargs) | |||
| python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) | |||
| python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='basic', **kwargs) | |||
| # Official Python grammar by Lark | |||
| python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], parser='lalr', **kwargs) | |||
| # Local Python2 grammar | |||
| python_parser2 = Lark.open('python2.lark', rel_to=__file__, parser='lalr', **kwargs) | |||
| python_parser2_earley = Lark.open('python2.lark', rel_to=__file__, parser='earley', lexer='basic', **kwargs) | |||
| try: | |||
| xrange | |||
| @@ -15,7 +15,6 @@ parser = lark.Lark.open(lark_path / 'grammars/lark.lark', rel_to=__file__, parse | |||
| grammar_files = [ | |||
| examples_path / 'advanced/python2.lark', | |||
| examples_path / 'advanced/python3.lark', | |||
| examples_path / 'relative-imports/multiples.lark', | |||
| examples_path / 'relative-imports/multiple2.lark', | |||
| examples_path / 'relative-imports/multiple3.lark', | |||
| @@ -1,3 +1,225 @@ | |||
| // Python 3 grammar for Lark | |||
| // This grammar should parse all python 3.x code successfully. | |||
| // Adapted from: https://docs.python.org/3/reference/grammar.html | |||
| // Start symbols for the grammar: | |||
| // single_input is a single interactive statement; | |||
| // file_input is a module or sequence of commands read from an input file; | |||
| // eval_input is the input for the eval() functions. | |||
| // NB: compound_stmt in single_input is followed by extra NEWLINE! | |||
| // | |||
| single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE | |||
| file_input: (_NEWLINE | stmt)* | |||
| eval_input: testlist _NEWLINE* | |||
| decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE | |||
| decorators: decorator+ | |||
| decorated: decorators (classdef | funcdef | async_funcdef) | |||
| async_funcdef: "async" funcdef | |||
| funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite | |||
| parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]] | |||
| | starparams | |||
| | kwparams | |||
| SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result | |||
| starparams: "*" typedparam? ("," paramvalue)* ["," kwparams] | |||
| kwparams: "**" typedparam ","? | |||
| ?paramvalue: typedparam ("=" test)? | |||
| ?typedparam: NAME (":" test)? | |||
| lambdef: "lambda" [lambda_params] ":" test | |||
| lambdef_nocond: "lambda" [lambda_params] ":" test_nocond | |||
| lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] | |||
| | lambda_starparams | |||
| | lambda_kwparams | |||
| ?lambda_paramvalue: NAME ("=" test)? | |||
| lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]] | |||
| lambda_kwparams: "**" NAME ","? | |||
| ?stmt: simple_stmt | compound_stmt | |||
| ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE | |||
| ?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) | |||
| expr_stmt: testlist_star_expr | |||
| assign_stmt: annassign | augassign | assign | |||
| annassign: testlist_star_expr ":" test ["=" test] | |||
| assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ | |||
| augassign: testlist_star_expr augassign_op (yield_expr|testlist) | |||
| !augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" | |||
| ?testlist_star_expr: test_or_star_expr | |||
| | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple | |||
| | test_or_star_expr "," -> tuple | |||
| // For normal and annotated assignments, additional restrictions enforced by the interpreter | |||
| del_stmt: "del" exprlist | |||
| pass_stmt: "pass" | |||
| ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt | |||
| break_stmt: "break" | |||
| continue_stmt: "continue" | |||
| return_stmt: "return" [testlist] | |||
| yield_stmt: yield_expr | |||
| raise_stmt: "raise" [test ["from" test]] | |||
| import_stmt: import_name | import_from | |||
| import_name: "import" dotted_as_names | |||
| // note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS | |||
| import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) | |||
| !dots: "."+ | |||
| import_as_name: NAME ["as" NAME] | |||
| dotted_as_name: dotted_name ["as" NAME] | |||
| import_as_names: import_as_name ("," import_as_name)* [","] | |||
| dotted_as_names: dotted_as_name ("," dotted_as_name)* | |||
| dotted_name: NAME ("." NAME)* | |||
| global_stmt: "global" NAME ("," NAME)* | |||
| nonlocal_stmt: "nonlocal" NAME ("," NAME)* | |||
| assert_stmt: "assert" test ["," test] | |||
| ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | |||
| async_stmt: "async" (funcdef | with_stmt | for_stmt) | |||
| if_stmt: "if" test ":" suite elifs ["else" ":" suite] | |||
| elifs: elif_* | |||
| elif_: "elif" test ":" suite | |||
| while_stmt: "while" test ":" suite ["else" ":" suite] | |||
| for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] | |||
| try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] | |||
| | "try" ":" suite finally -> try_finally | |||
| finally: "finally" ":" suite | |||
| except_clauses: except_clause+ | |||
| except_clause: "except" [test ["as" NAME]] ":" suite | |||
| with_stmt: "with" with_items ":" suite | |||
| with_items: with_item ("," with_item)* | |||
| with_item: test ["as" expr] | |||
| // NB compile.c makes sure that the default except clause is last | |||
| suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT | |||
| ?test: or_test ("if" or_test "else" test)? | |||
| | lambdef | |||
| ?test_nocond: or_test | lambdef_nocond | |||
| ?or_test: and_test ("or" and_test)* | |||
| ?and_test: not_test_ ("and" not_test_)* | |||
| ?not_test_: "not" not_test_ -> not_test | |||
| | comparison | |||
| ?comparison: expr (comp_op expr)* | |||
| star_expr: "*" expr | |||
| ?expr: or_expr | |||
| ?or_expr: xor_expr ("|" xor_expr)* | |||
| ?xor_expr: and_expr ("^" and_expr)* | |||
| ?and_expr: shift_expr ("&" shift_expr)* | |||
| ?shift_expr: arith_expr (_shift_op arith_expr)* | |||
| ?arith_expr: term (_add_op term)* | |||
| ?term: factor (_mul_op factor)* | |||
| ?factor: _unary_op factor | power | |||
| !_unary_op: "+"|"-"|"~" | |||
| !_add_op: "+"|"-" | |||
| !_shift_op: "<<"|">>" | |||
| !_mul_op: "*"|"@"|"/"|"%"|"//" | |||
| // <> isn't actually a valid comparison operator in Python. It's here for the | |||
| // sake of a __future__ import described in PEP 401 (which really works :-) | |||
| !comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" | |||
| ?power: await_expr ("**" factor)? | |||
| ?await_expr: AWAIT? atom_expr | |||
| AWAIT: "await" | |||
| ?atom_expr: atom_expr "(" [arguments] ")" -> funccall | |||
| | atom_expr "[" subscriptlist "]" -> getitem | |||
| | atom_expr "." NAME -> getattr | |||
| | atom | |||
| ?atom: "(" yield_expr ")" | |||
| | "(" _tuple_inner? ")" -> tuple | |||
| | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension | |||
| | "[" _testlist_comp? "]" -> list | |||
| | "[" comprehension{test_or_star_expr} "]" -> list_comprehension | |||
| | "{" _dict_exprlist? "}" -> dict | |||
| | "{" comprehension{key_value} "}" -> dict_comprehension | |||
| | "{" _set_exprlist "}" -> set | |||
| | "{" comprehension{test} "}" -> set_comprehension | |||
| | NAME -> var | |||
| | number | |||
| | string_concat | |||
| | "(" test ")" | |||
| | "..." -> ellipsis | |||
| | "None" -> const_none | |||
| | "True" -> const_true | |||
| | "False" -> const_false | |||
| ?string_concat: string+ | |||
| _testlist_comp: test | _tuple_inner | |||
| _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") | |||
| ?test_or_star_expr: test | |||
| | star_expr | |||
| ?subscriptlist: subscript | |||
| | subscript (("," subscript)+ [","] | ",") -> subscript_tuple | |||
| ?subscript: test | ([test] ":" [test] [sliceop]) -> slice | |||
| sliceop: ":" [test] | |||
| ?exprlist: (expr|star_expr) | |||
| | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") | |||
| ?testlist: test | testlist_tuple | |||
| testlist_tuple: test (("," test)+ [","] | ",") | |||
| _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] | |||
| key_value: test ":" test | |||
| _set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] | |||
| classdef: "class" NAME ["(" [arguments] ")"] ":" suite | |||
| arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? | |||
| | starargs | |||
| | kwargs | |||
| | comprehension{test} | |||
| starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] | |||
| stararg: "*" test | |||
| kwargs: "**" test | |||
| ?argvalue: test ("=" test)? | |||
| comprehension{comp_result}: comp_result comp_fors [comp_if] | |||
| comp_fors: comp_for+ | |||
| comp_for: [ASYNC] "for" exprlist "in" or_test | |||
| ASYNC: "async" | |||
| ?comp_if: "if" test_nocond | |||
| // not used in grammar, but may appear in "node" passed from Parser to Compiler | |||
| encoding_decl: NAME | |||
| yield_expr: "yield" [testlist] | |||
| | "yield" "from" test -> yield_from | |||
| number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER | |||
| string: STRING | LONG_STRING | |||
| // Other terminals | |||
| _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | |||
| %ignore /[\t \f]+/ // WS | |||
| %ignore /\\[\t \f]*\r?\n/ // LINE_CONT | |||
| %ignore COMMENT | |||
| %declare _INDENT _DEDENT | |||
| // Python terminals | |||
| NAME: /[a-zA-Z_]\w*/ | |||
| @@ -820,15 +820,19 @@ class FromPackageLoader: | |||
| # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway | |||
| raise IOError() | |||
| to_try = [base_path.path] | |||
| err = None | |||
| for path in to_try: | |||
| full_path = os.path.join(path, grammar_path) | |||
| try: | |||
| text = pkgutil.get_data(self.pkg_name, full_path) | |||
| except IOError: | |||
| text: Optional[str] = pkgutil.get_data(self.pkg_name, full_path) | |||
| except IOError as e: | |||
| err = e | |||
| continue | |||
| else: | |||
| return PackageResource(self.pkg_name, full_path), text.decode() | |||
| raise IOError() | |||
| return PackageResource(self.pkg_name, full_path), (text.decode() if text else '') | |||
| raise IOError('Cannot find grammar in given paths') from err | |||
| stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS) | |||