| @@ -1,6 +1,7 @@ | |||||
| from .utils import Serialize | from .utils import Serialize | ||||
| ###{standalone | ###{standalone | ||||
| END = '__$END$__' | |||||
| class Symbol(Serialize): | class Symbol(Serialize): | ||||
| __slots__ = ('name',) | __slots__ = ('name',) | ||||
| @@ -14,7 +14,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import ParsingFrontend | from .parser_frontends import ParsingFrontend | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END | |||||
| from .utils import classify, suppress, dedup_list, Str | from .utils import classify, suppress, dedup_list, Str | ||||
| from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | ||||
| @@ -99,6 +99,7 @@ TERMINALS = { | |||||
| '_EXTEND': r'%extend', | '_EXTEND': r'%extend', | ||||
| '_IMPORT': r'%import', | '_IMPORT': r'%import', | ||||
| 'NUMBER': r'[+-]?\d+', | 'NUMBER': r'[+-]?\d+', | ||||
| '_END': r'\$', | |||||
| } | } | ||||
| RULES = { | RULES = { | ||||
| @@ -135,6 +136,7 @@ RULES = { | |||||
| 'nonterminal', | 'nonterminal', | ||||
| 'literal', | 'literal', | ||||
| 'range', | 'range', | ||||
| 'end', | |||||
| 'template_usage'], | 'template_usage'], | ||||
| 'terminal': ['TERMINAL'], | 'terminal': ['TERMINAL'], | ||||
| @@ -144,6 +146,7 @@ RULES = { | |||||
| 'maybe': ['_LBRA expansions _RBRA'], | 'maybe': ['_LBRA expansions _RBRA'], | ||||
| 'range': ['STRING _DOTDOT STRING'], | 'range': ['STRING _DOTDOT STRING'], | ||||
| 'end': ['_END'], | |||||
| 'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | 'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | ||||
| '_template_args': ['value', | '_template_args': ['value', | ||||
| @@ -791,6 +794,9 @@ class PrepareGrammar(Transformer_InPlace): | |||||
| def nonterminal(self, name): | def nonterminal(self, name): | ||||
| return name | return name | ||||
| def end(self): | |||||
| return Token('TERMINAL', END) | |||||
| def _find_used_symbols(tree): | def _find_used_symbols(tree): | ||||
| assert tree.data == 'expansions' | assert tree.data == 'expansions' | ||||
| @@ -938,6 +944,8 @@ class GrammarBuilder: | |||||
| self._definitions = {} | self._definitions = {} | ||||
| self._ignore_names = [] | self._ignore_names = [] | ||||
| self._definitions[END] = ((), Tree('expansions', []), self._check_options(END, None)) | |||||
| def _is_term(self, name): | def _is_term(self, name): | ||||
| # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME` | # Imported terminals are of the form `Path__to__Grammar__file__TERMINAL_NAME` | ||||
| # Only the last part is the actual name, and the rest might contain mixed case | # Only the last part is the actual name, and the rest might contain mixed case | ||||
| @@ -2,7 +2,7 @@ from collections import Counter, defaultdict | |||||
| from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
| from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
| from ..grammar import Rule, Terminal, NonTerminal | |||||
| from ..grammar import Rule, Terminal, NonTerminal, END | |||||
| class RulePtr(object): | class RulePtr(object): | ||||
| @@ -125,7 +125,7 @@ class GrammarAnalyzer(object): | |||||
| def __init__(self, parser_conf, debug=False): | def __init__(self, parser_conf, debug=False): | ||||
| self.debug = debug | self.debug = debug | ||||
| root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) | |||||
| root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)]) | |||||
| for start in parser_conf.start} | for start in parser_conf.start} | ||||
| rules = parser_conf.rules + list(root_rules.values()) | rules = parser_conf.rules + list(root_rules.values()) | ||||
| @@ -12,7 +12,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger | |||||
| from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
| from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | ||||
| from ..grammar import Rule | |||||
| from ..grammar import Rule, END | |||||
| ###{standalone | ###{standalone | ||||
| @@ -177,7 +177,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| assert(len(root.kernel) == 1) | assert(len(root.kernel) == 1) | ||||
| for rp in root.kernel: | for rp in root.kernel: | ||||
| assert(rp.index == 0) | assert(rp.index == 0) | ||||
| self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | |||||
| self.directly_reads[(root, rp.next)] = set([ Terminal(END) ]) | |||||
| for state in self.lr0_states: | for state in self.lr0_states: | ||||
| seen = set() | seen = set() | ||||
| @@ -4,6 +4,7 @@ from copy import copy | |||||
| from .. import Token | from .. import Token | ||||
| from ..exceptions import UnexpectedToken | from ..exceptions import UnexpectedToken | ||||
| from ..grammar import END | |||||
| class InteractiveParser(object): | class InteractiveParser(object): | ||||
| @@ -21,18 +22,18 @@ class InteractiveParser(object): | |||||
| Note that ``token`` has to be an instance of ``Token``. | Note that ``token`` has to be an instance of ``Token``. | ||||
| """ | """ | ||||
| return self.parser_state.feed_token(token, token.type == '$END') | |||||
| return self.parser_state.feed_token(token, token.type == END) | |||||
| def exhaust_lexer(self): | def exhaust_lexer(self): | ||||
| """Try to feed the rest of the lexer state into the interactive parser. | """Try to feed the rest of the lexer state into the interactive parser. | ||||
| Note that this modifies the instance in place and does not feed an '$END' Token""" | |||||
| Note that this modifies the instance in place and does not feed an END Token""" | |||||
| for token in self.lexer_state.lex(self.parser_state): | for token in self.lexer_state.lex(self.parser_state): | ||||
| self.parser_state.feed_token(token) | self.parser_state.feed_token(token) | ||||
| def feed_eof(self, last_token=None): | def feed_eof(self, last_token=None): | ||||
| """Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||||
| eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||||
| """Feed a END Token. Borrows from 'last_token' if given.""" | |||||
| eof = Token.new_borrow_pos(END, '', last_token) if last_token is not None else Token(END, '', 0, 1, 1) | |||||
| return self.feed_token(eof) | return self.feed_token(eof) | ||||
| @@ -116,7 +117,7 @@ class ImmutableInteractiveParser(InteractiveParser): | |||||
| def exhaust_lexer(self): | def exhaust_lexer(self): | ||||
| """Try to feed the rest of the lexer state into the parser. | """Try to feed the rest of the lexer state into the parser. | ||||
| Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||||
| Note that this returns a new ImmutableInteractiveParser and does not feed an END Token""" | |||||
| cursor = self.as_mutable() | cursor = self.as_mutable() | ||||
| cursor.exhaust_lexer() | cursor.exhaust_lexer() | ||||
| return cursor.as_immutable() | return cursor.as_immutable() | ||||
| @@ -10,6 +10,7 @@ from ..utils import Serialize | |||||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
| from .lalr_interactive_parser import InteractiveParser | from .lalr_interactive_parser import InteractiveParser | ||||
| from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | ||||
| from ..grammar import END | |||||
| ###{standalone | ###{standalone | ||||
| @@ -60,7 +61,7 @@ class LALR_Parser(Serialize): | |||||
| return e.interactive_parser.resume_parse() | return e.interactive_parser.resume_parse() | ||||
| except UnexpectedToken as e2: | except UnexpectedToken as e2: | ||||
| if (isinstance(e, UnexpectedToken) | if (isinstance(e, UnexpectedToken) | ||||
| and e.token.type == e2.token.type == '$END' | |||||
| and e.token.type == e2.token.type == END | |||||
| and e.interactive_parser == e2.interactive_parser): | and e.interactive_parser == e2.interactive_parser): | ||||
| # Prevent infinite loop | # Prevent infinite loop | ||||
| raise e2 | raise e2 | ||||
| @@ -132,7 +133,7 @@ class ParserState(object): | |||||
| if action is Shift: | if action is Shift: | ||||
| # shift once and return | # shift once and return | ||||
| assert not is_end | |||||
| # assert not is_end | |||||
| state_stack.append(arg) | state_stack.append(arg) | ||||
| value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) | value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) | ||||
| return | return | ||||
| @@ -178,8 +179,11 @@ class _Parser(object): | |||||
| for token in state.lexer.lex(state): | for token in state.lexer.lex(state): | ||||
| state.feed_token(token) | state.feed_token(token) | ||||
| token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||||
| return state.feed_token(token, True) | |||||
| token = Token.new_borrow_pos(END, '', token) if token else Token(END, '', 0, 1, 1) | |||||
| while True: | |||||
| x = state.feed_token(token, True) | |||||
| if x is not None: | |||||
| return x | |||||
| except UnexpectedInput as e: | except UnexpectedInput as e: | ||||
| try: | try: | ||||
| e.interactive_parser = InteractiveParser(self, state, state.lexer) | e.interactive_parser = InteractiveParser(self, state, state.lexer) | ||||
| @@ -2467,6 +2467,43 @@ def _make_parser_test(LEXER, PARSER): | |||||
| s = "[0 1, 2,@, 3,,, 4, 5 6 ]$" | s = "[0 1, 2,@, 3,,, 4, 5 6 ]$" | ||||
| tree = g.parse(s, on_error=ignore_errors) | tree = g.parse(s, on_error=ignore_errors) | ||||
| @unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
| def test_end_symbol(self): | |||||
| grammar = """ | |||||
| start: a b? | |||||
| a: "a" $ | |||||
| b: "b" | |||||
| """ | |||||
| parser = _Lark(grammar) | |||||
| self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])])) | |||||
| self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
| @unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
| def test_end_symbol2(self): | |||||
| grammar = """ | |||||
| start: (a|b)+ | |||||
| a: "a" ("x"|$) | |||||
| b: "b" | |||||
| """ | |||||
| parser = _Lark(grammar) | |||||
| self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])])) | |||||
| self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
| @unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
| def test_end_symbol3(self): | |||||
| grammar = """ | |||||
| start: (a|b)+ | |||||
| a: "a" (e|"x") | |||||
| b: "b" | |||||
| e: $ | |||||
| """ | |||||
| parser = _Lark(grammar) | |||||
| self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])])) | |||||
| self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
| _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | ||||
| _TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||