@@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||||
from .lexer import Token | from .lexer import Token | ||||
from .lark import Lark | from .lark import Lark | ||||
__version__ = "0.8.7" | |||||
__version__ = "0.8.6" |
@@ -1,7 +1,6 @@ | |||||
from .utils import Serialize | from .utils import Serialize | ||||
###{standalone | ###{standalone | ||||
END = '_END$' | |||||
class Symbol(Serialize): | class Symbol(Serialize): | ||||
__slots__ = ('name',) | __slots__ = ('name',) | ||||
@@ -309,7 +309,7 @@ class TraditionalLexer(Lexer): | |||||
if t.pattern.min_width == 0: | if t.pattern.min_width == 0: | ||||
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | ||||
assert set(ignore) <= {t.name for t in terminals}, (ignore, terminals) | |||||
assert set(ignore) <= {t.name for t in terminals} | |||||
# Init | # Init | ||||
self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | ||||
@@ -11,7 +11,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
from .parser_frontends import LALR_TraditionalLexer | from .parser_frontends import LALR_TraditionalLexer | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END | |||||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||||
from .utils import classify, suppress, dedup_list, Str | from .utils import classify, suppress, dedup_list, Str | ||||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | ||||
@@ -94,7 +94,6 @@ TERMINALS = { | |||||
'_DECLARE': r'%declare', | '_DECLARE': r'%declare', | ||||
'_IMPORT': r'%import', | '_IMPORT': r'%import', | ||||
'NUMBER': r'[+-]?\d+', | 'NUMBER': r'[+-]?\d+', | ||||
'_END': r'\$', | |||||
} | } | ||||
RULES = { | RULES = { | ||||
@@ -131,8 +130,7 @@ RULES = { | |||||
'nonterminal', | 'nonterminal', | ||||
'literal', | 'literal', | ||||
'range', | 'range', | ||||
'template_usage', | |||||
'end'], | |||||
'template_usage'], | |||||
'terminal': ['TERMINAL'], | 'terminal': ['TERMINAL'], | ||||
'nonterminal': ['RULE'], | 'nonterminal': ['RULE'], | ||||
@@ -141,7 +139,6 @@ RULES = { | |||||
'maybe': ['_LBRA expansions _RBRA'], | 'maybe': ['_LBRA expansions _RBRA'], | ||||
'range': ['STRING _DOTDOT STRING'], | 'range': ['STRING _DOTDOT STRING'], | ||||
'end': ['_END'], | |||||
'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | 'template_usage': ['RULE _LBRACE _template_args _RBRACE'], | ||||
'_template_args': ['value', | '_template_args': ['value', | ||||
@@ -302,9 +299,6 @@ class CanonizeTree(Transformer_InPlace): | |||||
tokenmods, value = args | tokenmods, value = args | ||||
return tokenmods + [value] | return tokenmods + [value] | ||||
def end(self): | |||||
return Token('TERMINAL', END) | |||||
class PrepareAnonTerminals(Transformer_InPlace): | class PrepareAnonTerminals(Transformer_InPlace): | ||||
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" | ||||
@@ -813,7 +807,6 @@ class GrammarLoader: | |||||
term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] | term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] | ||||
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] | term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] | ||||
term_defs.append((END, (None, 0))) | |||||
rule_defs = [options_from_rule(*x) for x in rule_defs] | rule_defs = [options_from_rule(*x) for x in rule_defs] | ||||
# Execute statements | # Execute statements | ||||
@@ -906,7 +899,7 @@ class GrammarLoader: | |||||
raise GrammarError("Terminal '%s' defined more than once" % name) | raise GrammarError("Terminal '%s' defined more than once" % name) | ||||
terminal_names.add(name) | terminal_names.add(name) | ||||
if set(ignore_names) - terminal_names: | |||||
if set(ignore_names) > terminal_names: | |||||
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) | raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) | ||||
resolve_term_references(term_defs) | resolve_term_references(term_defs) | ||||
@@ -2,7 +2,7 @@ from collections import Counter, defaultdict | |||||
from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
from ..grammar import Rule, Terminal, NonTerminal, END | |||||
from ..grammar import Rule, Terminal, NonTerminal | |||||
class RulePtr(object): | class RulePtr(object): | ||||
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object): | |||||
def __init__(self, parser_conf, debug=False): | def __init__(self, parser_conf, debug=False): | ||||
self.debug = debug | self.debug = debug | ||||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)]) | |||||
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')]) | |||||
for start in parser_conf.start} | for start in parser_conf.start} | ||||
rules = parser_conf.rules + list(root_rules.values()) | rules = parser_conf.rules + list(root_rules.values()) | ||||
@@ -13,7 +13,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator | |||||
from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | ||||
from ..grammar import Rule, END | |||||
from ..grammar import Rule | |||||
###{standalone | ###{standalone | ||||
@@ -178,7 +178,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
assert(len(root.kernel) == 1) | assert(len(root.kernel) == 1) | ||||
for rp in root.kernel: | for rp in root.kernel: | ||||
assert(rp.index == 0) | assert(rp.index == 0) | ||||
self.directly_reads[(root, rp.next)] = set([ Terminal(END) ]) | |||||
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | |||||
for state in self.lr0_states: | for state in self.lr0_states: | ||||
seen = set() | seen = set() | ||||
@@ -5,7 +5,6 @@ | |||||
from ..exceptions import UnexpectedToken | from ..exceptions import UnexpectedToken | ||||
from ..lexer import Token | from ..lexer import Token | ||||
from ..utils import Enumerator, Serialize | from ..utils import Enumerator, Serialize | ||||
from ..grammar import END | |||||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
@@ -106,16 +105,12 @@ class _Parser: | |||||
raise | raise | ||||
token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1) | |||||
token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) | |||||
while True: | while True: | ||||
_action, arg = get_action(token) | _action, arg = get_action(token) | ||||
if _action is Shift: | |||||
state_stack.append(arg) | |||||
value_stack.append(token) | |||||
else: | |||||
assert(_action is Reduce) | |||||
reduce(arg) | |||||
if state_stack[-1] == end_state: | |||||
return value_stack[-1] | |||||
assert(_action is Reduce) | |||||
reduce(arg) | |||||
if state_stack[-1] == end_state: | |||||
return value_stack[-1] | |||||
###} | ###} |
@@ -6,7 +6,7 @@ __version__ ,= re.findall('__version__ = "(.*)"', open('lark/__init__.py').read( | |||||
setup( | setup( | ||||
name = "lark-parser", | name = "lark-parser", | ||||
version = __version__, | version = __version__, | ||||
packages = ['lark', 'lark.parsers', 'lark.tools', 'lark.grammars', 'lark.__pyinstaller', 'lark-stubs'], | |||||
packages = ['lark', 'lark.parsers', 'lark.tools', 'lark.grammars', 'lark-stubs'], | |||||
requires = [], | requires = [], | ||||
install_requires = [], | install_requires = [], | ||||
@@ -1737,42 +1737,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
""" | """ | ||||
parser = _Lark(grammar) | parser = _Lark(grammar) | ||||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
def test_end_symbol(self): | |||||
grammar = """ | |||||
start: a b? | |||||
a: "a" $ | |||||
b: "b" | |||||
""" | |||||
parser = _Lark(grammar) | |||||
self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])])) | |||||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
def test_end_symbol2(self): | |||||
grammar = """ | |||||
start: (a|b)+ | |||||
a: "a" ("x"|$) | |||||
b: "b" | |||||
""" | |||||
parser = _Lark(grammar) | |||||
self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])])) | |||||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only") | |||||
def test_end_symbol3(self): | |||||
grammar = """ | |||||
start: (a|b)+ | |||||
a: "a" (e|"x") | |||||
b: "b" | |||||
e: $ | |||||
""" | |||||
parser = _Lark(grammar) | |||||
self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])])) | |||||
self.assertRaises(UnexpectedInput, parser.parse, 'ab') | |||||
@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") | @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") | ||||
def test_serialize(self): | def test_serialize(self): | ||||