Browse Source

BUGFIX - Fixed 2 issues with line counting

1) Failed to detect newlines in regexps of the form [^...]

2) Last token didn't get end_line & end_column
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.4
Erez Shinan 6 years ago
parent
commit
181f061091
2 changed files with 24 additions and 1 deletions
  1. +11
    -1
      lark/lexer.py
  2. +13
    -0
      tests/test_parser.py

+ 11
- 1
lark/lexer.py View File

@@ -105,6 +105,10 @@ class _Lex:
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state)
break break


if t:
t.end_line = line_ctr.line
t.end_column = line_ctr.column

class UnlessCallback: class UnlessCallback:
def __init__(self, mres): def __init__(self, mres):
self.mres = mres self.mres = mres
@@ -164,7 +168,13 @@ def build_mres(tokens, match_whole=False):
return _build_mres(tokens, len(tokens), match_whole) return _build_mres(tokens, len(tokens), match_whole)


def _regexp_has_newline(r): def _regexp_has_newline(r):
return '\n' in r or '\\n' in r or ('(?s' in r and '.' in r)
"""Expressions that may indicate newlines in a regexp:
- newlines (\n)
- escaped newline (\n)
- anything but ([^...])
- any-char (.) when the flag (?s) exists
"""
return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r)


class Lexer: class Lexer:
"""Lexer interface """Lexer interface


+ 13
- 0
tests/test_parser.py View File

@@ -1224,6 +1224,19 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(t.children, ['a', 'bc']) self.assertEqual(t.children, ['a', 'bc'])
self.assertEqual(t.children[0].type, 'A') self.assertEqual(t.children[0].type, 'A')


def test_line_counting(self):
p = _Lark("start: /[^x]+/")

text = 'hello\nworld'
t = p.parse(text)
tok = t.children[0]
self.assertEqual(tok, text)
self.assertEqual(tok.line, 1)
self.assertEqual(tok.column, 1)
if _LEXER != 'dynamic':
self.assertEqual(tok.end_line, 2)
self.assertEqual(tok.end_column, 6)





_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()


Loading…
Cancel
Save