|
- // Python 2 grammar for Lark
-
- // NOTE: Work in progress!!! (XXX TODO)
- // This grammar should parse all python 2.x code successfully,
- // but the resulting parse-tree is still not well-organized.
-
- // Adapted from: https://docs.python.org/2/reference/grammar.html
- // Adapted by: Erez Shinan
-
- // Start symbols for the grammar:
- // single_input is a single interactive statement;
- // file_input is a module or sequence of commands read from an input file;
- // eval_input is the input for the eval() and input() functions.
- // NB: compound_stmt in single_input is followed by extra _NEWLINE!
- single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
- ?file_input: (_NEWLINE | stmt)*
- eval_input: testlist _NEWLINE?
-
- decorator: "@" dotted_name [ "(" [arglist] ")" ] _NEWLINE
- decorators: decorator+
- decorated: decorators (classdef | funcdef)
- funcdef: "def" NAME "(" parameters ")" ":" suite
- parameters: [paramlist]
- paramlist: param ("," param)* ["," [star_params ["," kw_params] | kw_params]]
- | star_params ["," kw_params]
- | kw_params
- star_params: "*" NAME
- kw_params: "**" NAME
- param: fpdef ["=" test]
- fpdef: NAME | "(" fplist ")"
- fplist: fpdef ("," fpdef)* [","]
-
- ?stmt: simple_stmt | compound_stmt
- ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
- ?small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt
- | import_stmt | global_stmt | exec_stmt | assert_stmt)
- expr_stmt: testlist augassign (yield_expr|testlist) -> augassign2
- | testlist ("=" (yield_expr|testlist))+ -> assign
- | testlist
-
- augassign: ("+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=")
- // For normal assignments, additional restrictions enforced by the interpreter
- print_stmt: "print" ( [ test ("," test)* [","] ] | ">>" test [ ("," test)+ [","] ] )
- del_stmt: "del" exprlist
- pass_stmt: "pass"
- ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
- break_stmt: "break"
- continue_stmt: "continue"
- return_stmt: "return" [testlist]
- yield_stmt: yield_expr
- raise_stmt: "raise" [test ["," test ["," test]]]
- import_stmt: import_name | import_from
- import_name: "import" dotted_as_names
- import_from: "from" ("."* dotted_name | "."+) "import" ("*" | "(" import_as_names ")" | import_as_names)
- ?import_as_name: NAME ["as" NAME]
- ?dotted_as_name: dotted_name ["as" NAME]
- import_as_names: import_as_name ("," import_as_name)* [","]
- dotted_as_names: dotted_as_name ("," dotted_as_name)*
- dotted_name: NAME ("." NAME)*
- global_stmt: "global" NAME ("," NAME)*
- exec_stmt: "exec" expr ["in" test ["," test]]
- assert_stmt: "assert" test ["," test]
-
- ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
- if_stmt: "if" test ":" suite ("elif" test ":" suite)* ["else" ":" suite]
- while_stmt: "while" test ":" suite ["else" ":" suite]
- for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
- try_stmt: ("try" ":" suite ((except_clause ":" suite)+ ["else" ":" suite] ["finally" ":" suite] | "finally" ":" suite))
- with_stmt: "with" with_item ("," with_item)* ":" suite
- with_item: test ["as" expr]
- // NB compile.c makes sure that the default except clause is last
- except_clause: "except" [test [("as" | ",") test]]
- suite: simple_stmt | _NEWLINE _INDENT _NEWLINE? stmt+ _DEDENT _NEWLINE?
-
- // Backward compatibility cruft to support:
- // [ x for x in lambda: True, lambda: False if x() ]
- // even while also allowing:
- // lambda x: 5 if x else 2
- // (But not a mix of the two)
- testlist_safe: old_test [("," old_test)+ [","]]
- old_test: or_test | old_lambdef
- old_lambdef: "lambda" [paramlist] ":" old_test
-
- ?test: or_test ["if" or_test "else" test] | lambdef
- ?or_test: and_test ("or" and_test)*
- ?and_test: not_test ("and" not_test)*
- ?not_test: "not" not_test | comparison
- ?comparison: expr (comp_op expr)*
- comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
- ?expr: xor_expr ("|" xor_expr)*
- ?xor_expr: and_expr ("^" and_expr)*
- ?and_expr: shift_expr ("&" shift_expr)*
- ?shift_expr: arith_expr (("<<"|">>") arith_expr)*
- ?arith_expr: term (("+"|"-") term)*
- ?term: factor (("*"|"/"|"%"|"//") factor)*
- ?factor: ("+"|"-"|"~") factor | power
- ?power: molecule ["**" factor]
- // _trailer: "(" [arglist] ")" | "[" subscriptlist "]" | "." NAME
- ?molecule: molecule "(" [arglist] ")" -> func_call
- | molecule "[" [subscriptlist] "]" -> getitem
- | molecule "." NAME -> getattr
- | atom
- ?atom: "(" [yield_expr|testlist_comp] ")"
- | "[" [listmaker] "]"
- | "{" [dictorsetmaker] "}"
- | "`" testlist1 "`"
- | NAME | number | string+
- listmaker: test ( list_for | ("," test)* [","] )
- ?testlist_comp: test ( comp_for | ("," test)* [","] )
- lambdef: "lambda" [paramlist] ":" test
- ?subscriptlist: subscript ("," subscript)* [","]
- subscript: "." "." "." | test | [test] ":" [test] [sliceop]
- sliceop: ":" [test]
- ?exprlist: expr ("," expr)* [","]
- ?testlist: test ("," test)* [","]
- dictorsetmaker: ( (test ":" test (comp_for | ("," test ":" test)* [","])) | (test (comp_for | ("," test)* [","])) )
-
- classdef: "class" NAME ["(" [testlist] ")"] ":" suite
-
- arglist: (argument ",")* (argument [","]
- | star_args ["," kw_args]
- | kw_args)
-
- star_args: "*" test
- kw_args: "**" test
-
-
- // The reason that keywords are test nodes instead of NAME is that using NAME
- // results in an ambiguity. ast.c makes sure it's a NAME.
- argument: test [comp_for] | test "=" test
-
- list_iter: list_for | list_if
- list_for: "for" exprlist "in" testlist_safe [list_iter]
- list_if: "if" old_test [list_iter]
-
- comp_iter: comp_for | comp_if
- comp_for: "for" exprlist "in" or_test [comp_iter]
- comp_if: "if" old_test [comp_iter]
-
- testlist1: test ("," test)*
-
- yield_expr: "yield" [testlist]
-
- number: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT | IMAG_NUMBER
- string: STRING | LONG_STRING
- // Tokens
-
- COMMENT: /\#[^\n]*/
- _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
-
- %ignore /[\t \f]+/ // WS
- %ignore /\\\\[\t \f]*\r?\n/ // LINE_CONT
- %ignore COMMENT
-
-
- STRING : /(?i)[ub]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/
- LONG_STRING: /(?i)(?s)[ub]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/
-
- DEC_NUMBER: /(?i)[1-9]\d*l?/
- HEX_NUMBER: /(?i)0x[\da-f]*l?/
- OCT_NUMBER: /(?i)0o?[0-7]*l?/
- %import common.FLOAT -> FLOAT
- %import common.INT -> _INT
- %import common.CNAME -> NAME
- IMAG_NUMBER: (_INT | FLOAT) ("j"|"J")
-
- _DEDENT: "<DEDENT>"
- _INDENT: "<INDENT>"
|