Browse Source

Fixed Python grammars, and a bug in newline detection

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
34cd792ffc
4 changed files with 14 additions and 17 deletions
  1. +5
    -6
      examples/python2.g
  2. +5
    -7
      examples/python3.g
  3. +3
    -3
      examples/python_parser.py
  4. +1
    -1
      lark/lexer.py

+ 5
- 6
examples/python2.g View File

@@ -149,10 +149,6 @@ string: STRING | LONG_STRING
COMMENT: /#[^\n]*/
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT

STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
LONG_STRING.2: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is

@@ -164,6 +160,9 @@ OCT_NUMBER: /0o?[0-7]*l?/i
%import common.CNAME -> NAME
IMAG_NUMBER: (_INT | FLOAT) ("j"|"J")

_DEDENT: "<DEDENT>"
_INDENT: "<INDENT>"

%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
%declare _INDENT _DEDENT


+ 5
- 7
examples/python3.g View File

@@ -127,7 +127,7 @@ AWAIT: "await"
| "True" -> const_true
| "False" -> const_false

?testlist_comp: (test|star_expr) ( comp_for | ("," (test|star_expr))+ [","] | ",")
?testlist_comp: (test|star_expr) [comp_for | ("," (test|star_expr))+ [","] | ","]
subscriptlist: subscript ("," subscript)* [","]
subscript: test | [test] ":" [test] [sliceop]
sliceop: ":" [test]
@@ -170,10 +170,6 @@ COMMENT: /#[^\n]*/
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+


%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT

STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is

@@ -184,6 +180,8 @@ BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
IMAG_NUMBER.2: /\d+j|${FLOAT_NUMBER}j/i

_DEDENT: "<DEDENT>"
_INDENT: "<INDENT>"
%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
%declare _INDENT _DEDENT


+ 3
- 3
examples/python_parser.py View File

@@ -14,8 +14,8 @@ from lark.indenter import Indenter

class PythonIndenter(Indenter):
NL_type = '_NEWLINE'
OPEN_PAREN_types = ['__LPAR', '__LSQB', '__LBRACE']
CLOSE_PAREN_types = ['__RPAR', '__RSQB', '__RBRACE']
OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
INDENT_type = '_INDENT'
DEDENT_type = '_DEDENT'
tab_len = 8
@@ -78,6 +78,6 @@ def test_earley_equals_lalr():

if __name__ == '__main__':
test_python_lib()
test_earley_equals_lalr()
# test_earley_equals_lalr()
# python_parser3.parse(_read(sys.argv[1]) + '\n')


+ 1
- 1
lark/lexer.py View File

@@ -178,7 +178,7 @@ def build_mres(tokens, match_whole=False):
return _build_mres(tokens, len(tokens), match_whole)

def _regexp_has_newline(r):
return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)
return '\n' in r or '\\n' in r or ('(?s' in r and '.' in r)

class Lexer:
def __init__(self, tokens, ignore=(), user_callbacks={}):


Loading…
Cancel
Save