Browse Source

Small fixes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Sh 4 years ago
parent
commit
b2f1b3bf7c
7 changed files with 451 additions and 227 deletions
  1. +415
    -193
      examples/standalone/json_parser.py
  2. +0
    -2
      lark/lark.py
  3. +1
    -1
      lark/lexer.py
  4. +5
    -30
      lark/load_grammar.py
  5. +1
    -1
      lark/parsers/lalr_analysis.py
  6. +3
    -0
      lark/tools/standalone.py
  7. +26
    -0
      lark/utils.py

+ 415
- 193
examples/standalone/json_parser.py
File diff suppressed because it is too large
View File


+ 0
- 2
lark/lark.py View File

@@ -1,8 +1,6 @@
from __future__ import absolute_import

import os
import time
from collections import defaultdict
from io import open

from .utils import STRING_TYPE, Serialize, SerializeMemoizer


+ 1
- 1
lark/lexer.py View File

@@ -303,7 +303,7 @@ class TraditionalLexer(Lexer):
for t in terminals:
try:
re.compile(t.pattern.to_regexp())
except:
except re.error:
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))

if t.pattern.min_width == 0:


+ 5
- 30
lark/load_grammar.py View File

@@ -2,11 +2,10 @@

import os.path
import sys
from ast import literal_eval
from copy import copy, deepcopy
from io import open

from .utils import bfs
from .utils import bfs, eval_escaping
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
@@ -346,31 +345,6 @@ def _rfind(s, choices):



def _fix_escaping(s):
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
try:
n2 = next(i)
except StopIteration:
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(s, e)

return s


def _literal_to_pattern(literal):
v = literal.value
@@ -383,7 +357,7 @@ def _literal_to_pattern(literal):
assert v[0] == v[-1] and v[0] in '"/'
x = v[1:-1]

s = _fix_escaping(x)
s = eval_escaping(x)

if literal.type == 'STRING':
s = s.replace('\\\\', '\\')
@@ -401,7 +375,7 @@ class PrepareLiterals(Transformer_InPlace):
assert start.type == end.type == 'STRING'
start = start.value[1:-1]
end = end.value[1:-1]
assert len(_fix_escaping(start)) == len(_fix_escaping(end)) == 1, (start, end, len(_fix_escaping(start)), len(_fix_escaping(end)))
assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end)))
regexp = '[%s-%s]' % (start, end)
return ST('pattern', [PatternRE(regexp)])

@@ -543,7 +517,8 @@ class Grammar:
for dups in duplicates.values():
if len(dups) > 1:
if dups[0].expansion:
raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" % ''.join('\n * %s' % i for i in dups))
raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"
% ''.join('\n * %s' % i for i in dups))

# Empty rule; assert all other attributes are equal
assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)


+ 1
- 1
lark/parsers/lalr_analysis.py View File

@@ -202,7 +202,7 @@ class LALR_Analyzer(GrammarAnalyzer):
continue
s2 = rp2.next
# if s2 is a terminal
if not s2 in self.lr0_rules_by_origin:
if s2 not in self.lr0_rules_by_origin:
dr.add(s2)
if s2 in self.NULLABLE:
r.add((next_state, s2))


+ 3
- 0
lark/tools/standalone.py View File

@@ -34,6 +34,9 @@
# See <http://www.gnu.org/licenses/>.
#
#

import os
from io import open
###}

import pprint


+ 26
- 0
lark/utils.py View File

@@ -1,4 +1,5 @@
import sys
from ast import literal_eval
from collections import deque

class fzset(frozenset):
@@ -239,3 +240,28 @@ class Enumerator(Serialize):
assert len(r) == len(self.enums)
return r


def eval_escaping(s):
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
try:
n2 = next(i)
except StopIteration:
raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'uxnftr':
w += '\\'
w += n2
w = w.replace('\\"', '"').replace("'", "\\'")

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(s, e)

return s

Loading…
Cancel
Save