@@ -1,6 +1,7 @@ | |||
from .utils import Serialize | |||
###{standalone | |||
END = '_END$' | |||
class Symbol(Serialize): | |||
__slots__ = ('name',) | |||
@@ -309,7 +309,7 @@ class TraditionalLexer(Lexer): | |||
if t.pattern.min_width == 0: | |||
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | |||
assert set(ignore) <= {t.name for t in terminals} | |||
assert set(ignore) <= {t.name for t in terminals}, (ignore, terminals) | |||
# Init | |||
self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | |||
@@ -11,7 +11,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import LALR_TraditionalLexer | |||
from .common import LexerConf, ParserConf | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END | |||
from .utils import classify, suppress, dedup_list, Str | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | |||
@@ -94,6 +94,7 @@ TERMINALS = { | |||
'_DECLARE': r'%declare', | |||
'_IMPORT': r'%import', | |||
'NUMBER': r'[+-]?\d+', | |||
'_END': r'\$', | |||
} | |||
RULES = { | |||
@@ -130,7 +131,8 @@ RULES = { | |||
'nonterminal', | |||
'literal', | |||
'range', | |||
'template_usage'], | |||
'template_usage', | |||
'end'], | |||
'terminal': ['TERMINAL'], | |||
'nonterminal': ['RULE'], | |||
@@ -139,6 +141,7 @@ RULES = { | |||
'maybe': ['_LBRA expansions _RBRA'], | |||
'range': ['STRING _DOTDOT STRING'], | |||
'end': ['_END'], | |||
'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | |||
'_template_args': ['value', | |||
@@ -299,6 +302,9 @@ class CanonizeTree(Transformer_InPlace): | |||
tokenmods, value = args | |||
return tokenmods + [value] | |||
def end(self): | |||
return Token('TERMINAL', END) | |||
class PrepareAnonTerminals(Transformer_InPlace): | |||
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | |||
@@ -807,6 +813,7 @@ class GrammarLoader: | |||
term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] | |||
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] | |||
term_defs.append((END, (None, 0))) | |||
rule_defs = [options_from_rule(*x) for x in rule_defs] | |||
# Execute statements | |||
@@ -899,7 +906,7 @@ class GrammarLoader: | |||
raise GrammarError("Terminal '%s' defined more than once" % name) | |||
terminal_names.add(name) | |||
if set(ignore_names) > terminal_names: | |||
if set(ignore_names) - terminal_names: | |||
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) | |||
resolve_term_references(term_defs) | |||
@@ -2,7 +2,7 @@ from collections import Counter, defaultdict | |||
from ..utils import bfs, fzset, classify | |||
from ..exceptions import GrammarError | |||
from ..grammar import Rule, Terminal, NonTerminal | |||
from ..grammar import Rule, Terminal, NonTerminal, END | |||
class RulePtr(object): | |||
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object): | |||
def __init__(self, parser_conf, debug=False): | |||
self.debug = debug | |||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) | |||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)]) | |||
for start in parser_conf.start} | |||
rules = parser_conf.rules + list(root_rules.values()) | |||
@@ -13,7 +13,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator | |||
from ..exceptions import GrammarError | |||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | |||
from ..grammar import Rule | |||
from ..grammar import Rule, END | |||
###{standalone | |||
@@ -178,7 +178,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
assert(len(root.kernel) == 1) | |||
for rp in root.kernel: | |||
assert(rp.index == 0) | |||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | |||
self.directly_reads[(root, rp.next)] = set([ Terminal(END) ]) | |||
for state in self.lr0_states: | |||
seen = set() | |||
@@ -5,6 +5,7 @@ | |||
from ..exceptions import UnexpectedToken | |||
from ..lexer import Token | |||
from ..utils import Enumerator, Serialize | |||
from ..grammar import END | |||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
@@ -105,12 +106,16 @@ class _Parser: | |||
raise | |||
token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||
token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1) | |||
while True: | |||
_action, arg = get_action(token) | |||
assert(_action is Reduce) | |||
reduce(arg) | |||
if state_stack[-1] == end_state: | |||
return value_stack[-1] | |||
if _action is Shift: | |||
state_stack.append(arg) | |||
value_stack.append(token) | |||
else: | |||
assert(_action is Reduce) | |||
reduce(arg) | |||
if state_stack[-1] == end_state: | |||
return value_stack[-1] | |||
###} |
@@ -1737,6 +1737,42 @@ def _make_parser_test(LEXER, PARSER): | |||
""" | |||
parser = _Lark(grammar) | |||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||
def test_end_symbol(self): | |||
grammar = """ | |||
start: a b? | |||
a: "a" $ | |||
b: "b" | |||
""" | |||
parser = _Lark(grammar) | |||
self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])])) | |||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||
def test_end_symbol2(self): | |||
grammar = """ | |||
start: (a|b)+ | |||
a: "a" ("x"|$) | |||
b: "b" | |||
""" | |||
parser = _Lark(grammar) | |||
self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])])) | |||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||
def test_end_symbol3(self): | |||
grammar = """ | |||
start: (a|b)+ | |||
a: "a" (e|"x") | |||
b: "b" | |||
e: $ | |||
""" | |||
parser = _Lark(grammar) | |||
self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])])) | |||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||
@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") | |||
def test_serialize(self): | |||