@@ -1,6 +1,7 @@ | |||
from .utils import Serialize | |||
###{standalone | |||
END = '__$END$__' | |||
class Symbol(Serialize): | |||
__slots__ = ('name',) | |||
@@ -14,7 +14,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import ParsingFrontend | |||
from .common import LexerConf, ParserConf | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END | |||
from .utils import classify, suppress, dedup_list, Str | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | |||
@@ -99,6 +99,7 @@ TERMINALS = { | |||
'_EXTEND': r'%extend', | |||
'_IMPORT': r'%import', | |||
'NUMBER': r'[+-]?\d+', | |||
'_END': r'\$', | |||
} | |||
RULES = { | |||
@@ -135,6 +136,7 @@ RULES = { | |||
'nonterminal', | |||
'literal', | |||
'range', | |||
'end', | |||
'template_usage'], | |||
'terminal': ['TERMINAL'], | |||
@@ -144,6 +146,7 @@ RULES = { | |||
'maybe': ['_LBRA expansions _RBRA'], | |||
'range': ['STRING _DOTDOT STRING'], | |||
'end': ['_END'], | |||
'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | |||
'_template_args': ['value', | |||
@@ -791,6 +794,9 @@ class PrepareGrammar(Transformer_InPlace): | |||
def nonterminal(self, name): | |||
return name | |||
def end(self): | |||
return Token('TERMINAL', END) | |||
def _find_used_symbols(tree): | |||
assert tree.data == 'expansions' | |||
@@ -938,6 +944,8 @@ class GrammarBuilder: | |||
self._definitions = {} | |||
self._ignore_names = [] | |||
self._definitions[END] = ((), Tree('expansions', []), self._check_options(END, None)) | |||
def _is_term(self, name): | |||
# Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME` | |||
# Only the last part is the actual name, and the rest might contain mixed case | |||
@@ -2,7 +2,7 @@ from collections import Counter, defaultdict | |||
from ..utils import bfs, fzset, classify | |||
from ..exceptions import GrammarError | |||
from ..grammar import Rule, Terminal, NonTerminal | |||
from ..grammar import Rule, Terminal, NonTerminal, END | |||
class RulePtr(object): | |||
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object): | |||
def __init__(self, parser_conf, debug=False): | |||
self.debug = debug | |||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) | |||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)]) | |||
for start in parser_conf.start} | |||
rules = parser_conf.rules + list(root_rules.values()) | |||
@@ -12,7 +12,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger | |||
from ..exceptions import GrammarError | |||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | |||
from ..grammar import Rule | |||
from ..grammar import Rule, END | |||
###{standalone | |||
@@ -177,7 +177,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
assert(len(root.kernel) == 1) | |||
for rp in root.kernel: | |||
assert(rp.index == 0) | |||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | |||
self.directly_reads[(root, rp.next)] = set([ Terminal(END) ]) | |||
for state in self.lr0_states: | |||
seen = set() | |||
@@ -4,6 +4,7 @@ from copy import copy | |||
from .. import Token | |||
from ..exceptions import UnexpectedToken | |||
from ..grammar import END | |||
class InteractiveParser(object): | |||
@@ -21,18 +22,18 @@ class InteractiveParser(object): | |||
Note that ``token`` has to be an instance of ``Token``. | |||
""" | |||
return self.parser_state.feed_token(token, token.type == '$END') | |||
return self.parser_state.feed_token(token, token.type == END) | |||
def exhaust_lexer(self): | |||
"""Try to feed the rest of the lexer state into the interactive parser. | |||
Note that this modifies the instance in place and does not feed an '$END' Token""" | |||
Note that this modifies the instance in place and does not feed an END Token""" | |||
for token in self.lexer_state.lex(self.parser_state): | |||
self.parser_state.feed_token(token) | |||
def feed_eof(self, last_token=None): | |||
"""Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||
"""Feed a END Token. Borrows from 'last_token' if given.""" | |||
eof = Token.new_borrow_pos(END, '', last_token) if last_token is not None else Token(END, '', 0, 1, 1) | |||
return self.feed_token(eof) | |||
@@ -116,7 +117,7 @@ class ImmutableInteractiveParser(InteractiveParser): | |||
def exhaust_lexer(self): | |||
"""Try to feed the rest of the lexer state into the parser. | |||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||
Note that this returns a new ImmutableInteractiveParser and does not feed an END Token""" | |||
cursor = self.as_mutable() | |||
cursor.exhaust_lexer() | |||
return cursor.as_immutable() | |||
@@ -10,6 +10,7 @@ from ..utils import Serialize | |||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
from .lalr_interactive_parser import InteractiveParser | |||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||
from ..grammar import END | |||
###{standalone | |||
@@ -60,7 +61,7 @@ class LALR_Parser(Serialize): | |||
return e.interactive_parser.resume_parse() | |||
except UnexpectedToken as e2: | |||
if (isinstance(e, UnexpectedToken) | |||
and e.token.type == e2.token.type == '$END' | |||
and e.token.type == e2.token.type == END | |||
and e.interactive_parser == e2.interactive_parser): | |||
# Prevent infinite loop | |||
raise e2 | |||
@@ -132,7 +133,7 @@ class ParserState(object): | |||
if action is Shift: | |||
# shift once and return | |||
assert not is_end | |||
# assert not is_end | |||
state_stack.append(arg) | |||
value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) | |||
return | |||
@@ -178,8 +179,11 @@ class _Parser(object): | |||
for token in state.lexer.lex(state): | |||
state.feed_token(token) | |||
token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||
return state.feed_token(token, True) | |||
token = Token.new_borrow_pos(END, '', token) if token else Token(END, '', 0, 1, 1) | |||
while True: | |||
x = state.feed_token(token, True) | |||
if x is not None: | |||
return x | |||
except UnexpectedInput as e: | |||
try: | |||
e.interactive_parser = InteractiveParser(self, state, state.lexer) | |||
@@ -2467,6 +2467,43 @@ def _make_parser_test(LEXER, PARSER): | |||
s = "[0 1, 2,@, 3,,, 4, 5 6 ]$" | |||
tree = g.parse(s, on_error=ignore_errors) | |||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||
def test_end_symbol(self): | |||
grammar = """ | |||
start: a b? | |||
a: "a" $ | |||
b: "b" | |||
""" | |||
parser = _Lark(grammar) | |||
self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])])) | |||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||
def test_end_symbol2(self): | |||
grammar = """ | |||
start: (a|b)+ | |||
a: "a" ("x"|$) | |||
b: "b" | |||
""" | |||
parser = _Lark(grammar) | |||
self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])])) | |||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||
def test_end_symbol3(self): | |||
grammar = """ | |||
start: (a|b)+ | |||
a: "a" (e|"x") | |||
b: "b" | |||
e: $ | |||
""" | |||
parser = _Lark(grammar) | |||
self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])])) | |||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||
_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | |||
_TestParser.__name__ = _NAME | |||