// Python 3 grammar for Lark // This grammar should parse all python 3.x code successfully. // Adapted from: https://docs.python.org/3/reference/grammar.html // Adapted by: Erez Shinan // Start symbols for the grammar: // single_input is a single interactive statement; // file_input is a module or sequence of commands read from an input file; // eval_input is the input for the eval() functions. // NB: compound_stmt in single_input is followed by extra NEWLINE! single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE file_input: (_NEWLINE | stmt)* eval_input: testlist _NEWLINE* decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE decorators: decorator+ decorated: decorators (classdef | funcdef | async_funcdef) async_funcdef: "async" funcdef funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]] | starparams | kwparams SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result starparams: "*" typedparam? ("," paramvalue)* ["," kwparams] kwparams: "**" typedparam ","? ?paramvalue: typedparam ("=" test)? ?typedparam: NAME (":" test)? lambdef: "lambda" [lambda_params] ":" test lambdef_nocond: "lambda" [lambda_params] ":" test_nocond lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] | lambda_starparams | lambda_kwparams ?lambda_paramvalue: NAME ("=" test)? lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]] lambda_kwparams: "**" NAME ","? ?stmt: simple_stmt | compound_stmt ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE ?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) expr_stmt: testlist_star_expr assign_stmt: annassign | augassign | assign annassign: testlist_star_expr ":" test ["=" test] assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ augassign: testlist_star_expr augassign_op (yield_expr|testlist) !augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" ?testlist_star_expr: test_or_star_expr | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple | test_or_star_expr "," -> tuple // For normal and annotated assignments, additional restrictions enforced by the interpreter del_stmt: "del" exprlist pass_stmt: "pass" ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt break_stmt: "break" continue_stmt: "continue" return_stmt: "return" [testlist] yield_stmt: yield_expr raise_stmt: "raise" [test ["from" test]] import_stmt: import_name | import_from import_name: "import" dotted_as_names // note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) !dots: "."+ import_as_name: NAME ["as" NAME] dotted_as_name: dotted_name ["as" NAME] import_as_names: import_as_name ("," import_as_name)* [","] dotted_as_names: dotted_as_name ("," dotted_as_name)* dotted_name: NAME ("." NAME)* global_stmt: "global" NAME ("," NAME)* nonlocal_stmt: "nonlocal" NAME ("," NAME)* assert_stmt: "assert" test ["," test] ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt async_stmt: "async" (funcdef | with_stmt | for_stmt) if_stmt: "if" test ":" suite elifs ["else" ":" suite] elifs: elif_* elif_: "elif" test ":" suite while_stmt: "while" test ":" suite ["else" ":" suite] for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] | "try" ":" suite finally -> try_finally finally: "finally" ":" suite except_clauses: except_clause+ except_clause: "except" [test ["as" NAME]] ":" suite with_stmt: "with" with_items ":" suite with_items: with_item ("," with_item)* with_item: test ["as" expr] // NB compile.c makes sure that the default except clause is last suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT ?test: or_test ("if" or_test "else" test)? | lambdef ?test_nocond: or_test | lambdef_nocond ?or_test: and_test ("or" and_test)* ?and_test: not_test_ ("and" not_test_)* ?not_test_: "not" not_test_ -> not_test | comparison ?comparison: expr (comp_op expr)* star_expr: "*" expr ?expr: or_expr ?or_expr: xor_expr ("|" xor_expr)* ?xor_expr: and_expr ("^" and_expr)* ?and_expr: shift_expr ("&" shift_expr)* ?shift_expr: arith_expr (_shift_op arith_expr)* ?arith_expr: term (_add_op term)* ?term: factor (_mul_op factor)* ?factor: _unary_op factor | power !_unary_op: "+"|"-"|"~" !_add_op: "+"|"-" !_shift_op: "<<"|">>" !_mul_op: "*"|"@"|"/"|"%"|"//" // <> isn't actually a valid comparison operator in Python. It's here for the // sake of a __future__ import described in PEP 401 (which really works :-) !comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" ?power: await_expr ("**" factor)? ?await_expr: AWAIT? atom_expr AWAIT: "await" ?atom_expr: atom_expr "(" [arguments] ")" -> funccall | atom_expr "[" subscriptlist "]" -> getitem | atom_expr "." NAME -> getattr | atom ?atom: "(" yield_expr ")" | "(" _tuple_inner? ")" -> tuple | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension | "[" _testlist_comp? "]" -> list | "[" comprehension{test_or_star_expr} "]" -> list_comprehension | "{" _dict_exprlist? "}" -> dict | "{" comprehension{key_value} "}" -> dict_comprehension | "{" _set_exprlist "}" -> set | "{" comprehension{test} "}" -> set_comprehension | NAME -> var | number | string_concat | "(" test ")" | "..." -> ellipsis | "None" -> const_none | "True" -> const_true | "False" -> const_false ?string_concat: string+ _testlist_comp: test | _tuple_inner _tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") ?test_or_star_expr: test | star_expr ?subscriptlist: subscript | subscript (("," subscript)+ [","] | ",") -> subscript_tuple ?subscript: test | ([test] ":" [test] [sliceop]) -> slice sliceop: ":" [test] ?exprlist: (expr|star_expr) | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") ?testlist: test | testlist_tuple testlist_tuple: test (("," test)+ [","] | ",") _dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] key_value: test ":" test _set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] classdef: "class" NAME ["(" [arguments] ")"] ":" suite arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? | starargs | kwargs | comprehension{test} starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] stararg: "*" test kwargs: "**" test ?argvalue: test ("=" test)? comprehension{comp_result}: comp_result comp_fors [comp_if] comp_fors: comp_for+ comp_for: [ASYNC] "for" exprlist "in" or_test ASYNC: "async" ?comp_if: "if" test_nocond // not used in grammar, but may appear in "node" passed from Parser to Compiler encoding_decl: NAME yield_expr: "yield" [testlist] | "yield" "from" test -> yield_from number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER string: STRING | LONG_STRING // Import terminals from standard library (grammars/python.lark) %import python (NAME, COMMENT, STRING, LONG_STRING) %import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER) // Other terminals _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ // WS %ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT %declare _INDENT _DEDENT