| @@ -1,6 +1,7 @@ | |||||
| from .utils import Serialize | from .utils import Serialize | ||||
| ###{standalone | ###{standalone | ||||
| END = '_END$' | |||||
| class Symbol(Serialize): | class Symbol(Serialize): | ||||
| is_term = NotImplemented | is_term = NotImplemented | ||||
| @@ -292,7 +292,7 @@ class TraditionalLexer(Lexer): | |||||
| if t.pattern.min_width == 0: | if t.pattern.min_width == 0: | ||||
| raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | ||||
| assert set(ignore) <= {t.name for t in terminals} | |||||
| assert set(ignore) <= {t.name for t in terminals}, (ignore, terminals) | |||||
| # Init | # Init | ||||
| self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | ||||
| @@ -11,7 +11,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import LALR_TraditionalLexer | from .parser_frontends import LALR_TraditionalLexer | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END | |||||
| from .utils import classify, suppress, dedup_list | from .utils import classify, suppress, dedup_list | ||||
| from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | ||||
| @@ -91,6 +91,7 @@ TERMINALS = { | |||||
| '_DECLARE': r'%declare', | '_DECLARE': r'%declare', | ||||
| '_IMPORT': r'%import', | '_IMPORT': r'%import', | ||||
| 'NUMBER': r'\d+', | 'NUMBER': r'\d+', | ||||
| '_END': r'\$', | |||||
| } | } | ||||
| RULES = { | RULES = { | ||||
| @@ -122,7 +123,8 @@ RULES = { | |||||
| 'value': ['terminal', | 'value': ['terminal', | ||||
| 'nonterminal', | 'nonterminal', | ||||
| 'literal', | 'literal', | ||||
| 'range'], | |||||
| 'range', | |||||
| 'end'], | |||||
| 'terminal': ['TERMINAL'], | 'terminal': ['TERMINAL'], | ||||
| 'nonterminal': ['RULE'], | 'nonterminal': ['RULE'], | ||||
| @@ -131,6 +133,7 @@ RULES = { | |||||
| 'maybe': ['_LBRA expansions _RBRA'], | 'maybe': ['_LBRA expansions _RBRA'], | ||||
| 'range': ['STRING _DOT _DOT STRING'], | 'range': ['STRING _DOT _DOT STRING'], | ||||
| 'end': ['_END'], | |||||
| 'term': ['TERMINAL _COLON expansions _NL', | 'term': ['TERMINAL _COLON expansions _NL', | ||||
| 'TERMINAL _DOT NUMBER _COLON expansions _NL'], | 'TERMINAL _DOT NUMBER _COLON expansions _NL'], | ||||
| @@ -285,6 +288,9 @@ class CanonizeTree(Transformer_InPlace): | |||||
| tokenmods, value = args | tokenmods, value = args | ||||
| return tokenmods + [value] | return tokenmods + [value] | ||||
| def end(self): | |||||
| return Token('TERMINAL', END) | |||||
| class PrepareAnonTerminals(Transformer_InPlace): | class PrepareAnonTerminals(Transformer_InPlace): | ||||
| "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | ||||
| @@ -735,6 +741,7 @@ class GrammarLoader: | |||||
| term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] | term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] | ||||
| term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] | term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] | ||||
| term_defs.append((END, (None, 0))) | |||||
| rule_defs = [options_from_rule(*x) for x in rule_defs] | rule_defs = [options_from_rule(*x) for x in rule_defs] | ||||
| # Execute statements | # Execute statements | ||||
| @@ -827,7 +834,7 @@ class GrammarLoader: | |||||
| raise GrammarError("Terminal '%s' defined more than once" % name) | raise GrammarError("Terminal '%s' defined more than once" % name) | ||||
| terminal_names.add(name) | terminal_names.add(name) | ||||
| if set(ignore_names) > terminal_names: | |||||
| if set(ignore_names) - terminal_names: | |||||
| raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) | raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) | ||||
| resolve_term_references(term_defs) | resolve_term_references(term_defs) | ||||
| @@ -2,7 +2,7 @@ from collections import Counter | |||||
| from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
| from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
| from ..grammar import Rule, Terminal, NonTerminal | |||||
| from ..grammar import Rule, Terminal, NonTerminal, END | |||||
| class RulePtr(object): | class RulePtr(object): | ||||
| @@ -109,7 +109,7 @@ class GrammarAnalyzer(object): | |||||
| def __init__(self, parser_conf, debug=False): | def __init__(self, parser_conf, debug=False): | ||||
| self.debug = debug | self.debug = debug | ||||
| root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) | |||||
| root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)]) | |||||
| for start in parser_conf.start} | for start in parser_conf.start} | ||||
| rules = parser_conf.rules + list(root_rules.values()) | rules = parser_conf.rules + list(root_rules.values()) | ||||
| @@ -13,7 +13,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator | |||||
| from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
| from .grammar_analysis import GrammarAnalyzer, Terminal | from .grammar_analysis import GrammarAnalyzer, Terminal | ||||
| from ..grammar import Rule | |||||
| from ..grammar import Rule, END | |||||
| ###{standalone | ###{standalone | ||||
| @@ -5,6 +5,7 @@ | |||||
| from ..exceptions import UnexpectedToken | from ..exceptions import UnexpectedToken | ||||
| from ..lexer import Token | from ..lexer import Token | ||||
| from ..utils import Enumerator, Serialize | from ..utils import Enumerator, Serialize | ||||
| from ..grammar import END | |||||
| from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable | ||||
| @@ -94,13 +95,14 @@ class _Parser: | |||||
| else: | else: | ||||
| reduce(arg) | reduce(arg) | ||||
| token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||||
| token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1) | |||||
| while True: | while True: | ||||
| _action, arg = get_action(token) | _action, arg = get_action(token) | ||||
| if _action is Shift: | if _action is Shift: | ||||
| assert arg == end_state | |||||
| val ,= value_stack | |||||
| return val | |||||
| if arg == end_state: | |||||
| val ,= value_stack | |||||
| return val | |||||
| state_stack.append(arg) | |||||
| else: | else: | ||||
| reduce(arg) | reduce(arg) | ||||
| @@ -1505,6 +1505,18 @@ def _make_parser_test(LEXER, PARSER): | |||||
| """ | """ | ||||
| parser = _Lark(grammar) | parser = _Lark(grammar) | ||||
| @unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
| def test_end_symbol(self): | |||||
| grammar = """ | |||||
| start: a b? | |||||
| a: "a" $ | |||||
| b: "b" | |||||
| """ | |||||
| parser = _Lark(grammar) | |||||
| self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])])) | |||||
| self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
| @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | ||||
| def test_serialize(self): | def test_serialize(self): | ||||