  1. // Python 2 grammar for Lark
  2. // NOTE: Work in progress!!! (XXX TODO)
  3. // This grammar should parse all python 2.x code successfully,
  4. // but the resulting parse-tree is still not well-organized.
  5. // Adapted from:
  6. // Adapted by: Erez Shinan
  7. // Start symbols for the grammar:
  8. // single_input is a single interactive statement;
  9. // file_input is a module or sequence of commands read from an input file;
  10. // eval_input is the input for the eval() and input() functions.
  11. // NB: compound_stmt in single_input is followed by extra _NEWLINE!
  12. single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
  13. ?file_input: (_NEWLINE | stmt)*
  14. eval_input: testlist _NEWLINE?
  15. decorator: "@" dotted_name [ "(" [arglist] ")" ] _NEWLINE
  16. decorators: decorator+
  17. decorated: decorators (classdef | funcdef)
  18. funcdef: "def" NAME "(" parameters ")" ":" suite
  19. parameters: [paramlist]
  20. paramlist: param ("," param)* ["," [star_params ["," kw_params] | kw_params]]
  21. | star_params ["," kw_params]
  22. | kw_params
  23. star_params: "*" NAME
  24. kw_params: "**" NAME
  25. param: fpdef ["=" test]
  26. fpdef: NAME | "(" fplist ")"
  27. fplist: fpdef ("," fpdef)* [","]
  28. ?stmt: simple_stmt | compound_stmt
  29. ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
  30. ?small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt
  31. | import_stmt | global_stmt | exec_stmt | assert_stmt)
  32. expr_stmt: testlist augassign (yield_expr|testlist) -> augassign2
  33. | testlist ("=" (yield_expr|testlist))+ -> assign
  34. | testlist
  35. augassign: ("+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=")
  36. // For normal assignments, additional restrictions enforced by the interpreter
  37. print_stmt: "print" ( [ test ("," test)* [","] ] | ">>" test [ ("," test)+ [","] ] )
  38. del_stmt: "del" exprlist
  39. pass_stmt: "pass"
  40. ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
  41. break_stmt: "break"
  42. continue_stmt: "continue"
  43. return_stmt: "return" [testlist]
  44. yield_stmt: yield_expr
  45. raise_stmt: "raise" [test ["," test ["," test]]]
  46. import_stmt: import_name | import_from
  47. import_name: "import" dotted_as_names
  48. import_from: "from" ("."* dotted_name | "."+) "import" ("*" | "(" import_as_names ")" | import_as_names)
  49. ?import_as_name: NAME ["as" NAME]
  50. ?dotted_as_name: dotted_name ["as" NAME]
  51. import_as_names: import_as_name ("," import_as_name)* [","]
  52. dotted_as_names: dotted_as_name ("," dotted_as_name)*
  53. dotted_name: NAME ("." NAME)*
  54. global_stmt: "global" NAME ("," NAME)*
  55. exec_stmt: "exec" expr ["in" test ["," test]]
  56. assert_stmt: "assert" test ["," test]
  57. ?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
  58. if_stmt: "if" test ":" suite ("elif" test ":" suite)* ["else" ":" suite]
  59. while_stmt: "while" test ":" suite ["else" ":" suite]
  60. for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
  61. try_stmt: ("try" ":" suite ((except_clause ":" suite)+ ["else" ":" suite] ["finally" ":" suite] | "finally" ":" suite))
  62. with_stmt: "with" with_item ("," with_item)* ":" suite
  63. with_item: test ["as" expr]
  64. // NB compile.c makes sure that the default except clause is last
  65. except_clause: "except" [test [("as" | ",") test]]
  66. suite: simple_stmt | _NEWLINE _INDENT _NEWLINE? stmt+ _DEDENT _NEWLINE?
  67. // Backward compatibility cruft to support:
  68. // [ x for x in lambda: True, lambda: False if x() ]
  69. // even while also allowing:
  70. // lambda x: 5 if x else 2
  71. // (But not a mix of the two)
  72. testlist_safe: old_test [("," old_test)+ [","]]
  73. old_test: or_test | old_lambdef
  74. old_lambdef: "lambda" [paramlist] ":" old_test
  75. ?test: or_test ["if" or_test "else" test] | lambdef
  76. ?or_test: and_test ("or" and_test)*
  77. ?and_test: not_test ("and" not_test)*
  78. ?not_test: "not" not_test | comparison
  79. ?comparison: expr (comp_op expr)*
  80. comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
  81. ?expr: xor_expr ("|" xor_expr)*
  82. ?xor_expr: and_expr ("^" and_expr)*
  83. ?and_expr: shift_expr ("&" shift_expr)*
  84. ?shift_expr: arith_expr (("<<"|">>") arith_expr)*
  85. ?arith_expr: term (("+"|"-") term)*
  86. ?term: factor (("*"|"/"|"%"|"//") factor)*
  87. ?factor: ("+"|"-"|"~") factor | power
  88. ?power: molecule ["**" factor]
  89. // _trailer: "(" [arglist] ")" | "[" subscriptlist "]" | "." NAME
  90. ?molecule: molecule "(" [arglist] ")" -> func_call
  91. | molecule "[" [subscriptlist] "]" -> getitem
  92. | molecule "." NAME -> getattr
  93. | atom
  94. ?atom: "(" [yield_expr|testlist_comp] ")" -> tuple
  95. | "[" [listmaker] "]"
  96. | "{" [dictorsetmaker] "}"
  97. | "`" testlist1 "`"
  98. | "(" test ")"
  99. | NAME | number | string+
  100. listmaker: test ( list_for | ("," test)* [","] )
  101. ?testlist_comp: test ( comp_for | ("," test)+ [","] | ",")
  102. lambdef: "lambda" [paramlist] ":" test
  103. ?subscriptlist: subscript ("," subscript)* [","]
  104. subscript: "." "." "." | test | [test] ":" [test] [sliceop]
  105. sliceop: ":" [test]
  106. ?exprlist: expr ("," expr)* [","]
  107. ?testlist: test ("," test)* [","]
  108. dictorsetmaker: ( (test ":" test (comp_for | ("," test ":" test)* [","])) | (test (comp_for | ("," test)* [","])) )
  109. classdef: "class" NAME ["(" [testlist] ")"] ":" suite
  110. arglist: (argument ",")* (argument [","]
  111. | star_args ["," kw_args]
  112. | kw_args)
  113. star_args: "*" test
  114. kw_args: "**" test
  115. // The reason that keywords are test nodes instead of NAME is that using NAME
  116. // results in an ambiguity. ast.c makes sure it's a NAME.
  117. argument: test [comp_for] | test "=" test
  118. list_iter: list_for | list_if
  119. list_for: "for" exprlist "in" testlist_safe [list_iter]
  120. list_if: "if" old_test [list_iter]
  121. comp_iter: comp_for | comp_if
  122. comp_for: "for" exprlist "in" or_test [comp_iter]
  123. comp_if: "if" old_test [comp_iter]
  124. testlist1: test ("," test)*
  125. yield_expr: "yield" [testlist]
  127. string: STRING | LONG_STRING
  128. // Tokens
  129. COMMENT: /#[^\n]*/
  130. _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
  131. %ignore /[\t \f]+/ // WS
  132. %ignore /\\[\t \f]*\r?\n/ // LINE_CONT
  133. %ignore COMMENT
  134. STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
  135. LONG_STRING.2: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
  136. DEC_NUMBER: /[1-9]\d*l?/i
  137. HEX_NUMBER: /0x[\da-f]*l?/i
  138. OCT_NUMBER: /0o?[0-7]*l?/i
  139. %import common.FLOAT -> FLOAT
  140. %import common.INT -> _INT
  141. %import common.CNAME -> NAME
  142. IMAG_NUMBER: (_INT | FLOAT) ("j"|"J")
  143. _DEDENT: "<DEDENT>"
  144. _INDENT: "<INDENT>"