Browse Source

Post-merge fixed for end_symbol, + two more tests (Issue #237)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.7
Erez Sh 4 years ago
parent
commit
cf7479f186
4 changed files with 33 additions and 13 deletions
  1. +0
    -8
      lark/load_grammar.py
  2. +1
    -1
      lark/parsers/lalr_analysis.py
  3. +8
    -4
      lark/parsers/lalr_parser.py
  4. +24
    -0
      tests/test_parser.py

+ 0
- 8
lark/load_grammar.py View File

@@ -91,12 +91,8 @@ TERMINALS = {
'_IGNORE': r'%ignore',
'_DECLARE': r'%declare',
'_IMPORT': r'%import',
<<<<<<< HEAD
'NUMBER': r'[+-]?\d+',
=======
'NUMBER': r'\d+',
'_END': r'\$',
>>>>>>> end_symbol
}

RULES = {
@@ -137,12 +133,8 @@ RULES = {
'?name': ['RULE', 'TERMINAL'],

'maybe': ['_LBRA expansions _RBRA'],
<<<<<<< HEAD
'range': ['STRING _DOTDOT STRING'],
=======
'range': ['STRING _DOT _DOT STRING'],
'end': ['_END'],
>>>>>>> end_symbol

'term': ['TERMINAL _COLON expansions _NL',
'TERMINAL _DOT NUMBER _COLON expansions _NL'],


+ 1
- 1
lark/parsers/lalr_analysis.py View File

@@ -178,7 +178,7 @@ class LALR_Analyzer(GrammarAnalyzer):
assert(len(root.kernel) == 1)
for rp in root.kernel:
assert(rp.index == 0)
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
self.directly_reads[(root, rp.next)] = set([ Terminal(END) ])

for state in self.lr0_states:
seen = set()


+ 8
- 4
lark/parsers/lalr_parser.py View File

@@ -97,9 +97,13 @@ class _Parser:
token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1)
while True:
_action, arg = get_action(token)
assert(_action is Reduce)
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]
if _action is Shift:
state_stack.append(arg)
value_stack.append(token)
else:
assert(_action is Reduce)
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]

###}

+ 24
- 0
tests/test_parser.py View File

@@ -1660,6 +1660,30 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol2(self):
grammar = """
start: (a|b)+
a: "a" ("x"|$)
b: "b"
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol3(self):
grammar = """
start: (a|b)+
a: "a" (e|"x")
b: "b"
e: $
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self):


Loading…
Cancel
Save