Browse Source

Support 32bit unicode escape sequence

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Aaron Tsang 3 years ago
parent
commit
11eafa3e9e
2 changed files with 16 additions and 1 deletions
  1. +1
    -1
      lark/load_grammar.py
  2. +15
    -0
      tests/test_parser.py

+ 1
- 1
lark/load_grammar.py View File

@@ -414,7 +414,7 @@ def eval_escaping(s):
raise GrammarError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
elif n2 not in 'Uuxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")


+ 15
- 0
tests/test_parser.py View File

@@ -1074,6 +1074,14 @@ def _make_parser_test(LEXER, PARSER):
""")
g.parse(u'\xa3\u0101\u00a3\u0203\n')

def test_unicode4(self):
g = _Lark(r"""start: UNIA UNIB UNIA UNIC
UNIA: /\xa3/
UNIB: "\U0010FFFF"
UNIC: /\U00100000/ /\n/
""")
g.parse(u'\xa3\U0010FFFF\u00a3\U00100000\n')

def test_hex_escape(self):
g = _Lark(r"""start: A B C
A: "\x01"
@@ -1088,6 +1096,13 @@ def _make_parser_test(LEXER, PARSER):
""")
g.parse('abc')

@unittest.skipIf(sys.version_info < (3, 3), "re package did not support 32bit unicode escape sequence before Python 3.3")
def test_unicode_literal_range_escape2(self):
g = _Lark(r"""start: A+
A: "\U0000FFFF".."\U00010002"
""")
g.parse('\U0000FFFF\U00010000\U00010001\U00010002')

def test_hex_literal_range_escape(self):
g = _Lark(r"""start: A+
A: "\x01".."\x03"


Loading…
Cancel
Save