From 8757e39397fd3bf4138711a14005756ff1578df3 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 21 Sep 2021 14:31:17 +0100 Subject: [PATCH] Moved Python3 grammar to stdlib as python.lark --- examples/advanced/python3.lark | 225 ----------------------------- examples/advanced/python_parser.py | 12 +- examples/lark_grammar.py | 1 - lark/grammars/python.lark | 222 ++++++++++++++++++++++++++++ lark/load_grammar.py | 12 +- 5 files changed, 237 insertions(+), 235 deletions(-) delete mode 100644 examples/advanced/python3.lark diff --git a/examples/advanced/python3.lark b/examples/advanced/python3.lark deleted file mode 100644 index cb3b077..0000000 --- a/examples/advanced/python3.lark +++ /dev/null @@ -1,225 +0,0 @@ -// Python 3 grammar for Lark - -// This grammar should parse all python 3.x code successfully. - -// Adapted from: https://docs.python.org/3/reference/grammar.html -// Adapted by: Erez Shinan - -// Start symbols for the grammar: -// single_input is a single interactive statement; -// file_input is a module or sequence of commands read from an input file; -// eval_input is the input for the eval() functions. -// NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE -file_input: (_NEWLINE | stmt)* -eval_input: testlist _NEWLINE* - -decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -async_funcdef: "async" funcdef -funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite - -parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]] - | starparams - | kwparams - -SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result -starparams: "*" typedparam? ("," paramvalue)* ["," kwparams] -kwparams: "**" typedparam ","? - -?paramvalue: typedparam ("=" test)? -?typedparam: NAME (":" test)? - - -lambdef: "lambda" [lambda_params] ":" test -lambdef_nocond: "lambda" [lambda_params] ":" test_nocond -lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] - | lambda_starparams - | lambda_kwparams -?lambda_paramvalue: NAME ("=" test)? -lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]] -lambda_kwparams: "**" NAME ","? - - -?stmt: simple_stmt | compound_stmt -?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE -?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr -assign_stmt: annassign | augassign | assign - -annassign: testlist_star_expr ":" test ["=" test] -assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ -augassign: testlist_star_expr augassign_op (yield_expr|testlist) -!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" -?testlist_star_expr: test_or_star_expr - | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple - | test_or_star_expr "," -> tuple - -// For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: "del" exprlist -pass_stmt: "pass" -?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: "break" -continue_stmt: "continue" -return_stmt: "return" [testlist] -yield_stmt: yield_expr -raise_stmt: "raise" [test ["from" test]] -import_stmt: import_name | import_from -import_name: "import" dotted_as_names -// note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS -import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) -!dots: "."+ -import_as_name: NAME ["as" NAME] -dotted_as_name: dotted_name ["as" NAME] -import_as_names: import_as_name ("," import_as_name)* [","] -dotted_as_names: dotted_as_name ("," dotted_as_name)* -dotted_name: NAME ("." NAME)* -global_stmt: "global" NAME ("," NAME)* -nonlocal_stmt: "nonlocal" NAME ("," NAME)* -assert_stmt: "assert" test ["," test] - -?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: "async" (funcdef | with_stmt | for_stmt) -if_stmt: "if" test ":" suite elifs ["else" ":" suite] -elifs: elif_* -elif_: "elif" test ":" suite -while_stmt: "while" test ":" suite ["else" ":" suite] -for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] -try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] - | "try" ":" suite finally -> try_finally -finally: "finally" ":" suite -except_clauses: except_clause+ -except_clause: "except" [test ["as" NAME]] ":" suite - -with_stmt: "with" with_items ":" suite -with_items: with_item ("," with_item)* -with_item: test ["as" expr] -// NB compile.c makes sure that the default except clause is last -suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT - -?test: or_test ("if" or_test "else" test)? - | lambdef -?test_nocond: or_test | lambdef_nocond - -?or_test: and_test ("or" and_test)* -?and_test: not_test_ ("and" not_test_)* -?not_test_: "not" not_test_ -> not_test - | comparison -?comparison: expr (comp_op expr)* -star_expr: "*" expr - -?expr: or_expr -?or_expr: xor_expr ("|" xor_expr)* -?xor_expr: and_expr ("^" and_expr)* -?and_expr: shift_expr ("&" shift_expr)* -?shift_expr: arith_expr (_shift_op arith_expr)* -?arith_expr: term (_add_op term)* -?term: factor (_mul_op factor)* -?factor: _unary_op factor | power - -!_unary_op: "+"|"-"|"~" -!_add_op: "+"|"-" -!_shift_op: "<<"|">>" -!_mul_op: "*"|"@"|"/"|"%"|"//" -// <> isn't actually a valid comparison operator in Python. It's here for the -// sake of a __future__ import described in PEP 401 (which really works :-) -!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" - -?power: await_expr ("**" factor)? -?await_expr: AWAIT? atom_expr -AWAIT: "await" - -?atom_expr: atom_expr "(" [arguments] ")" -> funccall - | atom_expr "[" subscriptlist "]" -> getitem - | atom_expr "." NAME -> getattr - | atom - -?atom: "(" yield_expr ")" - | "(" _tuple_inner? ")" -> tuple - | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension - | "[" _testlist_comp? "]" -> list - | "[" comprehension{test_or_star_expr} "]" -> list_comprehension - | "{" _dict_exprlist? "}" -> dict - | "{" comprehension{key_value} "}" -> dict_comprehension - | "{" _set_exprlist "}" -> set - | "{" comprehension{test} "}" -> set_comprehension - | NAME -> var - | number - | string_concat - | "(" test ")" - | "..." -> ellipsis - | "None" -> const_none - | "True" -> const_true - | "False" -> const_false - - -?string_concat: string+ - -_testlist_comp: test | _tuple_inner -_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") - - -?test_or_star_expr: test - | star_expr - -?subscriptlist: subscript - | subscript (("," subscript)+ [","] | ",") -> subscript_tuple -?subscript: test | ([test] ":" [test] [sliceop]) -> slice -sliceop: ":" [test] -?exprlist: (expr|star_expr) - | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") -?testlist: test | testlist_tuple -testlist_tuple: test (("," test)+ [","] | ",") -_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] - -key_value: test ":" test - -_set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] - -classdef: "class" NAME ["(" [arguments] ")"] ":" suite - - - -arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? - | starargs - | kwargs - | comprehension{test} - -starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] -stararg: "*" test -kwargs: "**" test - -?argvalue: test ("=" test)? - - -comprehension{comp_result}: comp_result comp_fors [comp_if] -comp_fors: comp_for+ -comp_for: [ASYNC] "for" exprlist "in" or_test -ASYNC: "async" -?comp_if: "if" test_nocond - -// not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: "yield" [testlist] - | "yield" "from" test -> yield_from - -number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER -string: STRING | LONG_STRING - -// Import terminals from standard library (grammars/python.lark) -%import python (NAME, COMMENT, STRING, LONG_STRING) -%import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER) - - -// Other terminals - -_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ - -%ignore /[\t \f]+/ // WS -%ignore /\\[\t \f]*\r?\n/ // LINE_CONT -%ignore COMMENT -%declare _INDENT _DEDENT - diff --git a/examples/advanced/python_parser.py b/examples/advanced/python_parser.py index b86fa01..40e8605 100644 --- a/examples/advanced/python_parser.py +++ b/examples/advanced/python_parser.py @@ -14,7 +14,6 @@ import glob, time from lark import Lark from lark.indenter import Indenter -# __path__ = os.path.dirname(__file__) class PythonIndenter(Indenter): NL_type = '_NEWLINE' @@ -24,11 +23,14 @@ class PythonIndenter(Indenter): DEDENT_type = '_DEDENT' tab_len = 8 -kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') +kwargs = dict(postlex=PythonIndenter(), start='file_input') -python_parser2 = Lark.open('python2.lark', parser='lalr', **kwargs) -python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) -python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='basic', **kwargs) +# Official Python grammar by Lark +python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], parser='lalr', **kwargs) + +# Local Python2 grammar +python_parser2 = Lark.open('python2.lark', rel_to=__file__, parser='lalr', **kwargs) +python_parser2_earley = Lark.open('python2.lark', rel_to=__file__, parser='earley', lexer='basic', **kwargs) try: xrange diff --git a/examples/lark_grammar.py b/examples/lark_grammar.py index 6730db9..e23c5e3 100644 --- a/examples/lark_grammar.py +++ b/examples/lark_grammar.py @@ -15,7 +15,6 @@ parser = lark.Lark.open(lark_path / 'grammars/lark.lark', rel_to=__file__, parse grammar_files = [ examples_path / 'advanced/python2.lark', - examples_path / 'advanced/python3.lark', examples_path / 'relative-imports/multiples.lark', examples_path / 'relative-imports/multiple2.lark', examples_path / 'relative-imports/multiple3.lark', diff --git a/lark/grammars/python.lark b/lark/grammars/python.lark index e73362d..7ff8f67 100644 --- a/lark/grammars/python.lark +++ b/lark/grammars/python.lark @@ -1,3 +1,225 @@ +// Python 3 grammar for Lark + +// This grammar should parse all python 3.x code successfully. + +// Adapted from: https://docs.python.org/3/reference/grammar.html + +// Start symbols for the grammar: +// single_input is a single interactive statement; +// file_input is a module or sequence of commands read from an input file; +// eval_input is the input for the eval() functions. +// NB: compound_stmt in single_input is followed by extra NEWLINE! +// + +single_input: _NEWLINE | simple_stmt | compound_stmt _NEWLINE +file_input: (_NEWLINE | stmt)* +eval_input: testlist _NEWLINE* + +decorator: "@" dotted_name [ "(" [arguments] ")" ] _NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: "async" funcdef +funcdef: "def" NAME "(" [parameters] ")" ["->" test] ":" suite + +parameters: paramvalue ("," paramvalue)* ["," SLASH] ["," [starparams | kwparams]] + | starparams + | kwparams + +SLASH: "/" // Otherwise the it will completely disappear and it will be undisguisable in the result +starparams: "*" typedparam? ("," paramvalue)* ["," kwparams] +kwparams: "**" typedparam ","? + +?paramvalue: typedparam ("=" test)? +?typedparam: NAME (":" test)? + + +lambdef: "lambda" [lambda_params] ":" test +lambdef_nocond: "lambda" [lambda_params] ":" test_nocond +lambda_params: lambda_paramvalue ("," lambda_paramvalue)* ["," [lambda_starparams | lambda_kwparams]] + | lambda_starparams + | lambda_kwparams +?lambda_paramvalue: NAME ("=" test)? +lambda_starparams: "*" [NAME] ("," lambda_paramvalue)* ["," [lambda_kwparams]] +lambda_kwparams: "**" NAME ","? + + +?stmt: simple_stmt | compound_stmt +?simple_stmt: small_stmt (";" small_stmt)* [";"] _NEWLINE +?small_stmt: (expr_stmt | assign_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr +assign_stmt: annassign | augassign | assign + +annassign: testlist_star_expr ":" test ["=" test] +assign: testlist_star_expr ("=" (yield_expr|testlist_star_expr))+ +augassign: testlist_star_expr augassign_op (yield_expr|testlist) +!augassign_op: "+=" | "-=" | "*=" | "@=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" +?testlist_star_expr: test_or_star_expr + | test_or_star_expr ("," test_or_star_expr)+ ","? -> tuple + | test_or_star_expr "," -> tuple + +// For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: "del" exprlist +pass_stmt: "pass" +?flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: "break" +continue_stmt: "continue" +return_stmt: "return" [testlist] +yield_stmt: yield_expr +raise_stmt: "raise" [test ["from" test]] +import_stmt: import_name | import_from +import_name: "import" dotted_as_names +// note below: the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS +import_from: "from" (dots? dotted_name | dots) "import" ("*" | "(" import_as_names ")" | import_as_names) +!dots: "."+ +import_as_name: NAME ["as" NAME] +dotted_as_name: dotted_name ["as" NAME] +import_as_names: import_as_name ("," import_as_name)* [","] +dotted_as_names: dotted_as_name ("," dotted_as_name)* +dotted_name: NAME ("." NAME)* +global_stmt: "global" NAME ("," NAME)* +nonlocal_stmt: "nonlocal" NAME ("," NAME)* +assert_stmt: "assert" test ["," test] + +?compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: "async" (funcdef | with_stmt | for_stmt) +if_stmt: "if" test ":" suite elifs ["else" ":" suite] +elifs: elif_* +elif_: "elif" test ":" suite +while_stmt: "while" test ":" suite ["else" ":" suite] +for_stmt: "for" exprlist "in" testlist ":" suite ["else" ":" suite] +try_stmt: "try" ":" suite except_clauses ["else" ":" suite] [finally] + | "try" ":" suite finally -> try_finally +finally: "finally" ":" suite +except_clauses: except_clause+ +except_clause: "except" [test ["as" NAME]] ":" suite + +with_stmt: "with" with_items ":" suite +with_items: with_item ("," with_item)* +with_item: test ["as" expr] +// NB compile.c makes sure that the default except clause is last +suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT + +?test: or_test ("if" or_test "else" test)? + | lambdef +?test_nocond: or_test | lambdef_nocond + +?or_test: and_test ("or" and_test)* +?and_test: not_test_ ("and" not_test_)* +?not_test_: "not" not_test_ -> not_test + | comparison +?comparison: expr (comp_op expr)* +star_expr: "*" expr + +?expr: or_expr +?or_expr: xor_expr ("|" xor_expr)* +?xor_expr: and_expr ("^" and_expr)* +?and_expr: shift_expr ("&" shift_expr)* +?shift_expr: arith_expr (_shift_op arith_expr)* +?arith_expr: term (_add_op term)* +?term: factor (_mul_op factor)* +?factor: _unary_op factor | power + +!_unary_op: "+"|"-"|"~" +!_add_op: "+"|"-" +!_shift_op: "<<"|">>" +!_mul_op: "*"|"@"|"/"|"%"|"//" +// <> isn't actually a valid comparison operator in Python. It's here for the +// sake of a __future__ import described in PEP 401 (which really works :-) +!comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" + +?power: await_expr ("**" factor)? +?await_expr: AWAIT? atom_expr +AWAIT: "await" + +?atom_expr: atom_expr "(" [arguments] ")" -> funccall + | atom_expr "[" subscriptlist "]" -> getitem + | atom_expr "." NAME -> getattr + | atom + +?atom: "(" yield_expr ")" + | "(" _tuple_inner? ")" -> tuple + | "(" comprehension{test_or_star_expr} ")" -> tuple_comprehension + | "[" _testlist_comp? "]" -> list + | "[" comprehension{test_or_star_expr} "]" -> list_comprehension + | "{" _dict_exprlist? "}" -> dict + | "{" comprehension{key_value} "}" -> dict_comprehension + | "{" _set_exprlist "}" -> set + | "{" comprehension{test} "}" -> set_comprehension + | NAME -> var + | number + | string_concat + | "(" test ")" + | "..." -> ellipsis + | "None" -> const_none + | "True" -> const_true + | "False" -> const_false + + +?string_concat: string+ + +_testlist_comp: test | _tuple_inner +_tuple_inner: test_or_star_expr (("," test_or_star_expr)+ [","] | ",") + + +?test_or_star_expr: test + | star_expr + +?subscriptlist: subscript + | subscript (("," subscript)+ [","] | ",") -> subscript_tuple +?subscript: test | ([test] ":" [test] [sliceop]) -> slice +sliceop: ":" [test] +?exprlist: (expr|star_expr) + | (expr|star_expr) (("," (expr|star_expr))+ [","]|",") +?testlist: test | testlist_tuple +testlist_tuple: test (("," test)+ [","] | ",") +_dict_exprlist: (key_value | "**" expr) ("," (key_value | "**" expr))* [","] + +key_value: test ":" test + +_set_exprlist: test_or_star_expr ("," test_or_star_expr)* [","] + +classdef: "class" NAME ["(" [arguments] ")"] ":" suite + + + +arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? + | starargs + | kwargs + | comprehension{test} + +starargs: stararg ("," stararg)* ("," argvalue)* ["," kwargs] +stararg: "*" test +kwargs: "**" test + +?argvalue: test ("=" test)? + + +comprehension{comp_result}: comp_result comp_fors [comp_if] +comp_fors: comp_for+ +comp_for: [ASYNC] "for" exprlist "in" or_test +ASYNC: "async" +?comp_if: "if" test_nocond + +// not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: "yield" [testlist] + | "yield" "from" test -> yield_from + +number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER +string: STRING | LONG_STRING + +// Other terminals + +_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ + +%ignore /[\t \f]+/ // WS +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT +%ignore COMMENT +%declare _INDENT _DEDENT + + // Python terminals NAME: /[a-zA-Z_]\w*/ diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 3250cfc..a12c61a 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -820,15 +820,19 @@ class FromPackageLoader: # Technically false, but FileNotFound doesn't exist in python2.7, and this message should never reach the end user anyway raise IOError() to_try = [base_path.path] + + err = None for path in to_try: full_path = os.path.join(path, grammar_path) try: - text = pkgutil.get_data(self.pkg_name, full_path) - except IOError: + text: Optional[str] = pkgutil.get_data(self.pkg_name, full_path) + except IOError as e: + err = e continue else: - return PackageResource(self.pkg_name, full_path), text.decode() - raise IOError() + return PackageResource(self.pkg_name, full_path), (text.decode() if text else '') + + raise IOError('Cannot find grammar in given paths') from err stdlib_loader = FromPackageLoader('lark', IMPORT_PATHS)