This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
6.9 KiB

  1. // Python 3 grammar for Lark
  2. // NOTE: Work in progress!!! (XXX TODO)
  3. // This grammar should parse all python 3.x code successfully,
  4. // but the resulting parse-tree is still not well-organized.
  5. // Adapted from: https://docs.python.org/3/reference/grammar.html
  6. // Adapted by: Erez Shinan
  7. // Start symbols for the grammar:
  8. // single_input is a single interactive statement;
  9. // file_input is a module or sequence of commands read from an input file;
  10. // eval_input is the input for the eval() functions.
  11. // NB: compound_stmt in single_input is followed by extra NEWLINE!
  12. single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE
  13. file_input: (_NEWLINE | stmt)*
  14. eval_input: testlist _NEWLINE*
  15. decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE
  16. decorators: decorator+
  17. decorated: decorators (classdef | funcdef | async_funcdef)
  18. async_funcdef: "async" funcdef
  19. funcdef: "def" NAME "(" parameters? ")" ["->" test] ":" suite
  20. parameters: paramvalue ("," paramvalue)* ["," [ starparams | kwparams]]
  21. | starparams
  22. | kwparams
  23. starparams: "*" typedparam? ("," paramvalue)* ["," kwparams]
  24. kwparams: "**" typedparam
  25. ?paramvalue: typedparam ["=" test]
  26. ?typedparam: NAME [":" test]
  27. varargslist: (vfpdef ["=" test] ("," vfpdef ["=" test])* ["," [ "*" [vfpdef] ("," vfpdef ["=" test])* ["," ["**" vfpdef [","]]] | "**" vfpdef [","]]]
  28. | "*" [vfpdef] ("," vfpdef ["=" test])* ["," ["**" vfpdef [","]]]
  29. | "**" vfpdef [","])
  30. vfpdef: NAME
  31. ?stmt: simple_stmt | compound_stmt
  32. ?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE
  33. ?small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
  34. ?expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist)
  35. | ("=" (yield_expr|testlist_star_expr))*)
  36. annassign: ":" test ["=" test]
  37. ?testlist_star_expr: (test|star_expr) ("," (test|star_expr))* [","]
  38. !augassign: ("+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=")
  39. // For normal and annotated assignments, additional restrictions enforced by the interpreter
  40. del_stmt: "del" exprlist
  41. pass_stmt: "pass"
  42. ?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
  43. break_stmt: "break"
  44. continue_stmt: "continue"
  45. return_stmt: "return" [testlist]
  46. yield_stmt: yield_expr
  47. raise_stmt: "raise" [test ["from" test]]
  48. import_stmt: import_name | import_from
  49. import_name: "import" dotted_as_names
  50. // note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS
  51. import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names)
  52. !dots: "."+
  53. import_as_name: NAME ["as" NAME]
  54. dotted_as_name: dotted_name ["as" NAME]
  55. import_as_names: import_as_name ("," import_as_name)* [","]
  56. dotted_as_names: dotted_as_name ("," dotted_as_name)*
  57. dotted_name: NAME ("." NAME)*
  58. global_stmt: "global" NAME ("," NAME)*
  59. nonlocal_stmt: "nonlocal" NAME ("," NAME)*
  60. assert_stmt: "assert" test ["," test]
  61. compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
  62. async_stmt: "async" (funcdef | with_stmt | for_stmt)
  63. if_stmt: "if" test ":" suite ("elif" test ":" suite)* ["else" ":" suite]
  64. while_stmt: "while" test ":" suite ["else" ":" suite]
  65. for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite]
  66. try_stmt: ("try" ":" suite ((except_clause ":" suite)+ ["else" ":" suite] ["finally" ":" suite] | "finally" ":" suite))
  67. with_stmt: "with" with_item ("," with_item)* ":" suite
  68. with_item: test ["as" expr]
  69. // NB compile.c makes sure that the default except clause is last
  70. except_clause: "except" [test ["as" NAME]]
  71. suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT
  72. ?test: or_test ["if" or_test "else" test] | lambdef
  73. ?test_nocond: or_test | lambdef_nocond
  74. lambdef: "lambda" [varargslist] ":" test
  75. lambdef_nocond: "lambda" [varargslist] ":" test_nocond
  76. ?or_test: and_test ("or" and_test)*
  77. ?and_test: not_test ("and" not_test)*
  78. ?not_test: "not" not_test -> not
  79. | comparison
  80. ?comparison: expr (_comp_op expr)*
  81. star_expr: "*" expr
  82. ?expr: xor_expr ("|" xor_expr)*
  83. ?xor_expr: and_expr ("^" and_expr)*
  84. ?and_expr: shift_expr ("&" shift_expr)*
  85. ?shift_expr: arith_expr (_shift_op arith_expr)*
  86. ?arith_expr: term (_add_op term)*
  87. ?term: factor (_mul_op factor)*
  88. ?factor: _factor_op factor | power
  89. !_factor_op: "+"|"-"|"~"
  90. !_add_op: "+"|"-"
  91. !_shift_op: "<<"|">>"
  92. !_mul_op: "*"|"@"|"/"|"%"|"//"
  93. // <> isn't actually a valid comparison operator in Python. It's here for the
  94. // sake of a __future__ import described in PEP 401 (which really works :-)
  95. !_comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
  96. ?power: await_expr ["**" factor]
  97. ?await_expr: AWAIT? atom_expr
  98. AWAIT: "await"
  99. ?atom_expr: atom_expr "(" [arguments] ")" -> funccall
  100. | atom_expr "[" subscriptlist "]" -> getitem
  101. | atom_expr "." NAME -> getattr
  102. | atom
  103. ?atom: "(" [yield_expr|testlist_comp] ")" -> tuple
  104. | "[" [testlist_comp] "]" -> list
  105. | "{" [dictorsetmaker] "}" -> dict
  106. | NAME -> var
  107. | number | string+
  108. | "(" test ")"
  109. | "..." -> ellipsis
  110. | "None" -> const_none
  111. | "True" -> const_true
  112. | "False" -> const_false
  113. ?testlist_comp: (test|star_expr) ( comp_for | ("," (test|star_expr))+ [","] | ",")
  114. subscriptlist: subscript ("," subscript)* [","]
  115. subscript: test | [test] ":" [test] [sliceop]
  116. sliceop: ":" [test]
  117. exprlist: (expr|star_expr) ("," (expr|star_expr))* [","]
  118. testlist: test ("," test)* [","]
  119. dictorsetmaker: ( ((test ":" test | "**" expr) (comp_for | ("," (test ":" test | "**" expr))* [","])) | ((test | star_expr) (comp_for | ("," (test | star_expr))* [","])) )
  120. classdef: "class" NAME ["(" [arguments] ")"] ":" suite
  121. arguments: argvalue ("," argvalue)* ["," [ starargs | kwargs]]
  122. | starargs
  123. | kwargs
  124. | test comp_for
  125. starargs: "*" test ("," "*" test)* ("," argvalue)* ["," kwargs]
  126. kwargs: "**" test
  127. ?argvalue: test ["=" test]
  128. comp_iter: comp_for | comp_if | async_for
  129. async_for: "async" "for" exprlist "in" or_test [comp_iter]
  130. comp_for: "for" exprlist "in" or_test [comp_iter]
  131. comp_if: "if" test_nocond [comp_iter]
  132. // not used in grammar, but may appear in "node" passed from Parser to Compiler
  133. encoding_decl: NAME
  134. yield_expr: "yield" [yield_arg]
  135. yield_arg: "from" test | testlist
  136. number: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
  137. string: STRING | LONG_STRING
  138. // Tokens
  139. NAME: /[a-zA-Z_]\w*/
  140. COMMENT: /#[^\n]*/
  141. _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
  142. %ignore /[\t \f]+/ // WS
  143. %ignore /\\[\t \f]*\r?\n/ // LINE_CONT
  144. %ignore COMMENT
  145. STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
  146. LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
  147. DEC_NUMBER: /0|[1-9]\d*/i
  148. HEX_NUMBER.2: /0x[\da-f]*/i
  149. OCT_NUMBER.2: /0o[0-7]*/i
  150. BIN_NUMBER.2 : /0b[0-1]*/i
  151. FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
  152. IMAG_NUMBER.2: /\d+j|${FLOAT_NUMBER}j/i
  153. _DEDENT: "<DEDENT>"
  154. _INDENT: "<INDENT>"