From 34cd792ffc1f96d144a599c14070e8f27262f645 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 27 May 2018 00:54:32 +0300 Subject: [PATCH] Fixed Python grammars, and a bug in newline detection --- examples/python2.g | 11 +++++------ examples/python3.g | 12 +++++------- examples/python_parser.py | 6 +++--- lark/lexer.py | 2 +- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/examples/python2.g b/examples/python2.g index 986350d..b0d5e14 100644 --- a/examples/python2.g +++ b/examples/python2.g @@ -149,10 +149,6 @@ string: STRING | LONG_STRING COMMENT: /#[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ -%ignore /[\t \f]+/ // WS -%ignore /\\[\t \f]*\r?\n/ // LINE_CONT -%ignore COMMENT - STRING : /[ubf]?r?("(?!"").*?(? NAME IMAG_NUMBER: (_INT | FLOAT) ("j"|"J") -_DEDENT: "" -_INDENT: "" + +%ignore /[\t \f]+/ // WS +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT +%ignore COMMENT +%declare _INDENT _DEDENT diff --git a/examples/python3.g b/examples/python3.g index 1c01e75..398e1ee 100644 --- a/examples/python3.g +++ b/examples/python3.g @@ -127,7 +127,7 @@ AWAIT: "await" | "True" -> const_true | "False" -> const_false -?testlist_comp: (test|star_expr) ( comp_for | ("," (test|star_expr))+ [","] | ",") +?testlist_comp: (test|star_expr) [comp_for | ("," (test|star_expr))+ [","] | ","] subscriptlist: subscript ("," subscript)* [","] subscript: test | [test] ":" [test] [sliceop] sliceop: ":" [test] @@ -170,10 +170,6 @@ COMMENT: /#[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ -%ignore /[\t \f]+/ // WS -%ignore /\\[\t \f]*\r?\n/ // LINE_CONT -%ignore COMMENT - STRING : /[ubf]?r?("(?!"").*?(?" -_INDENT: "" +%ignore /[\t \f]+/ // WS +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT +%ignore COMMENT +%declare _INDENT _DEDENT diff --git a/examples/python_parser.py b/examples/python_parser.py index ddbd5c4..0f9f30b 100644 --- a/examples/python_parser.py +++ b/examples/python_parser.py @@ -14,8 +14,8 @@ from lark.indenter import Indenter class PythonIndenter(Indenter): NL_type = '_NEWLINE' - OPEN_PAREN_types = ['__LPAR', '__LSQB', '__LBRACE'] - CLOSE_PAREN_types = ['__RPAR', '__RSQB', '__RBRACE'] + OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] + CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] INDENT_type = '_INDENT' DEDENT_type = '_DEDENT' tab_len = 8 @@ -78,6 +78,6 @@ def test_earley_equals_lalr(): if __name__ == '__main__': test_python_lib() - test_earley_equals_lalr() + # test_earley_equals_lalr() # python_parser3.parse(_read(sys.argv[1]) + '\n') diff --git a/lark/lexer.py b/lark/lexer.py index 6502535..4f668f6 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -178,7 +178,7 @@ def build_mres(tokens, match_whole=False): return _build_mres(tokens, len(tokens), match_whole) def _regexp_has_newline(r): - return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) + return '\n' in r or '\\n' in r or ('(?s' in r and '.' in r) class Lexer: def __init__(self, tokens, ignore=(), user_callbacks={}):