From e0889a3cf3cd4de596647eee6859eb6667cfc422 Mon Sep 17 00:00:00 2001
From: MegaIng <cornelius@krupp.hamburg>
Date: Sun, 29 Aug 2021 23:06:01 +0200
Subject: [PATCH] Sort Options inside a TerminalTree

---
 lark/load_grammar.py  | 4 ++++
 tests/test_grammar.py | 4 +---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 4a9360c..abcfce1 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -614,6 +614,10 @@ class TerminalTreeToPattern(Transformer_NonRecursive):
         if len(exps) == 1:
             return exps[0]
 
+        # Do a bit of sorting to make sure that the longest option is returned
+        # (Python's re module otherwise prefers just 'l' when given (l|ll) and both could match)
+        exps.sort(key=lambda x: (-x.max_width, -x.min_width, -len(x.value)))
+
         pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
         return _make_joined_pattern(pattern, {i.flags for i in exps})
 
diff --git a/tests/test_grammar.py b/tests/test_grammar.py
index 319d709..c771f2b 100644
--- a/tests/test_grammar.py
+++ b/tests/test_grammar.py
@@ -247,10 +247,8 @@ class TestGrammar(TestCase):
         self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192)
 
     def test_large_terminal(self):
-        # TODO: The `reversed` below is required because otherwise the regex engine is happy
-        #       with just parsing 9 from the string 999 instead of consuming the longest
         g = "start: NUMBERS\n"
-        g += "NUMBERS: " + '|'.join('"%s"' % i for i in reversed(range(0, 1000)))
+        g += "NUMBERS: " + '|'.join('"%s"' % i for i in range(0, 1000))
 
         l = Lark(g, parser='lalr')
         for i in (0, 9, 99, 999):