Browse Source

Merge branch 'fix_recursion_error_terminal' of https://github.com/MegaIng/lark into MegaIng-fix_recursion_error_terminal

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Sh 3 years ago
parent
commit
5dc44c31e1
7 changed files with 38 additions and 19 deletions
  1. +3
    -0
      lark-stubs/tree.pyi
  2. +1
    -1
      lark/exceptions.py
  3. +1
    -1
      lark/lexer.py
  4. +11
    -15
      lark/load_grammar.py
  5. +1
    -2
      lark/parse_tree_builder.py
  6. +9
    -0
      lark/tree.py
  7. +12
    -0
      tests/test_grammar.py

+ 3
- 0
lark-stubs/tree.pyi View File

@@ -40,6 +40,9 @@ class Tree:
def expand_kids_by_index(self, *indices: int) -> None: def expand_kids_by_index(self, *indices: int) -> None:
... ...


def expand_kids_by_data(self, *data_values: str) -> bool:
...

def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]:
... ...




+ 1
- 1
lark/exceptions.py View File

@@ -210,7 +210,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
# TODO considered_rules and expected can be figured out using state # TODO considered_rules and expected can be figured out using state
self.line = getattr(token, 'line', '?') self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?') self.column = getattr(token, 'column', '?')
self.pos_in_stream = getattr(token, 'pos_in_stream', None)
self.pos_in_stream = getattr(token, 'start_pos', None)
self.state = state self.state = state


self.token = token self.token = token


+ 1
- 1
lark/lexer.py View File

@@ -150,7 +150,7 @@ class Token(Str):


@property @property
def pos_in_stream(self): def pos_in_stream(self):
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning)
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning, 2)
return self.start_pos return self.start_pos


def update(self, type_=None, value=None): def update(self, type_=None, value=None):


+ 11
- 15
lark/load_grammar.py View File

@@ -91,6 +91,7 @@ TERMINALS = {
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS, 'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
'_NL': r'(\r?\n)+\s*', '_NL': r'(\r?\n)+\s*',
'_NL_OR': r'(\r?\n)+\s*\|',
'WS': r'[ \t]+', 'WS': r'[ \t]+',
'COMMENT': r'\s*//[^\n]*', 'COMMENT': r'\s*//[^\n]*',
'_TO': '->', '_TO': '->',
@@ -113,9 +114,10 @@ RULES = {
''], ''],
'_template_params': ['RULE', '_template_params': ['RULE',
'_template_params _COMMA RULE'], '_template_params _COMMA RULE'],
'expansions': ['alias',
'expansions _OR alias',
'expansions _NL _OR alias'],
'expansions': ['_expansions'],
'_expansions': ['alias',
'_expansions _OR alias',
'_expansions _NL_OR alias'],


'?alias': ['expansion _TO RULE', 'expansion'], '?alias': ['expansion _TO RULE', 'expansion'],
'expansion': ['_expansion'], 'expansion': ['_expansion'],
@@ -357,11 +359,8 @@ class SimplifyRule_Visitor(Visitor):
@staticmethod @staticmethod
def _flatten(tree): def _flatten(tree):
while True: while True:
to_expand = [i for i, child in enumerate(tree.children)
if isinstance(child, Tree) and child.data == tree.data]
if not to_expand:
if not tree.expand_kids_by_data(tree.data):
break break
tree.expand_kids_by_index(*to_expand)


def expansion(self, tree): def expansion(self, tree):
# rules_list unpacking # rules_list unpacking
@@ -599,8 +598,7 @@ def _make_joined_pattern(regexp, flags_set):


return PatternRE(regexp, flags) return PatternRE(regexp, flags)



class TerminalTreeToPattern(Transformer):
class TerminalTreeToPattern(Transformer_NonRecursive):
def pattern(self, ps): def pattern(self, ps):
p ,= ps p ,= ps
return p return p
@@ -670,8 +668,8 @@ class Grammar:
def compile(self, start, terminals_to_keep): def compile(self, start, terminals_to_keep):
# We change the trees in-place (to support huge grammars) # We change the trees in-place (to support huge grammars)
# So deepcopy allows calling compile more than once. # So deepcopy allows calling compile more than once.
term_defs = deepcopy(list(self.term_defs))
rule_defs = [(n,p,nr_deepcopy_tree(t),o) for n,p,t,o in self.rule_defs]
term_defs = [(n, (nr_deepcopy_tree(t), p)) for n, (t, p) in self.term_defs]
rule_defs = [(n, p, nr_deepcopy_tree(t), o) for n, p, t, o in self.rule_defs]


# =================== # ===================
# Compile Terminals # Compile Terminals
@@ -919,7 +917,7 @@ def _get_parser():
parser_conf = ParserConf(rules, callback, ['start']) parser_conf = ParserConf(rules, callback, ['start'])
lexer_conf.lexer_type = 'standard' lexer_conf.lexer_type = 'standard'
parser_conf.parser_type = 'lalr' parser_conf.parser_type = 'lalr'
_get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, {})
_get_parser.cache = ParsingFrontend(lexer_conf, parser_conf, None)
return _get_parser.cache return _get_parser.cache


GRAMMAR_ERRORS = [ GRAMMAR_ERRORS = [
@@ -1096,9 +1094,7 @@ class GrammarBuilder:
# TODO: think about what to do with 'options' # TODO: think about what to do with 'options'
base = self._definitions[name][1] base = self._definitions[name][1]


while len(base.children) == 2:
assert isinstance(base.children[0], Tree) and base.children[0].data == 'expansions', base
base = base.children[0]
assert isinstance(base, Tree) and base.data == 'expansions'
base.children.insert(0, exp) base.children.insert(0, exp)


def _ignore(self, exp_or_name): def _ignore(self, exp_or_name):


+ 1
- 2
lark/parse_tree_builder.py View File

@@ -204,8 +204,7 @@ class AmbiguousExpander:
if i in self.to_expand: if i in self.to_expand:
ambiguous.append(i) ambiguous.append(i)


to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)]
child.expand_kids_by_index(*to_expand)
child.expand_kids_by_data('_ambig')


if not ambiguous: if not ambiguous:
return self.node_builder(children) return self.node_builder(children)


+ 9
- 0
lark/tree.py View File

@@ -107,6 +107,15 @@ class Tree(object):
kid = self.children[i] kid = self.children[i]
self.children[i:i+1] = kid.children self.children[i:i+1] = kid.children


def expand_kids_by_data(self, *data_values):
"""Expand (inline) children with any of the given data values. Returns True if anything changed"""
indices = [i for i, c in enumerate(self.children) if isinstance(c, Tree) and c.data in data_values]
if indices:
self.expand_kids_by_index(*indices)
return True
else:
return False

def scan_values(self, pred): def scan_values(self, pred):
"""Return all values in the tree that evaluate pred(value) as true. """Return all values in the tree that evaluate pred(value) as true.




+ 12
- 0
tests/test_grammar.py View File

@@ -246,6 +246,18 @@ class TestGrammar(TestCase):
self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190) self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190)
self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192) self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192)


def test_large_terminal(self):
# TODO: The `reversed` below is required because otherwise the regex engine is happy
# with just parsing 9 from the string 999 instead of consuming the longest
g = "start: NUMBERS\n"
g += "NUMBERS: " + '|'.join('"%s"' % i for i in reversed(range(0, 1000)))

l = Lark(g, parser='lalr')
for i in (0, 9, 99, 999):
self.assertEqual(l.parse(str(i)), Tree('start', [str(i)]))
for i in (-1, 1000):
self.assertRaises(UnexpectedInput, l.parse, str(i))



if __name__ == '__main__': if __name__ == '__main__':
main() main()


Loading…
Cancel
Save