Browse Source

Reverted changes regarding EOF

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.8
Erez Sh 4 years ago
parent
commit
3bee21051e
9 changed files with 15 additions and 64 deletions
  1. +1
    -1
      lark/__init__.py
  2. +0
    -1
      lark/grammar.py
  3. +1
    -1
      lark/lexer.py
  4. +3
    -10
      lark/load_grammar.py
  5. +2
    -2
      lark/parsers/grammar_analysis.py
  6. +2
    -2
      lark/parsers/lalr_analysis.py
  7. +5
    -10
      lark/parsers/lalr_parser.py
  8. +1
    -1
      setup.py
  9. +0
    -36
      tests/test_parser.py

+ 1
- 1
lark/__init__.py View File

@@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
from .lexer import Token from .lexer import Token
from .lark import Lark from .lark import Lark


__version__ = "0.8.7"
__version__ = "0.8.6"

+ 0
- 1
lark/grammar.py View File

@@ -1,7 +1,6 @@
from .utils import Serialize from .utils import Serialize


###{standalone ###{standalone
END = '_END$'


class Symbol(Serialize): class Symbol(Serialize):
__slots__ = ('name',) __slots__ = ('name',)


+ 1
- 1
lark/lexer.py View File

@@ -309,7 +309,7 @@ class TraditionalLexer(Lexer):
if t.pattern.min_width == 0: if t.pattern.min_width == 0:
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))


assert set(ignore) <= {t.name for t in terminals}, (ignore, terminals)
assert set(ignore) <= {t.name for t in terminals}


# Init # Init
self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]


+ 3
- 10
lark/load_grammar.py View File

@@ -11,7 +11,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE
from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR_TraditionalLexer from .parser_frontends import LALR_TraditionalLexer
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress, dedup_list, Str from .utils import classify, suppress, dedup_list, Str
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken


@@ -94,7 +94,6 @@ TERMINALS = {
'_DECLARE': r'%declare', '_DECLARE': r'%declare',
'_IMPORT': r'%import', '_IMPORT': r'%import',
'NUMBER': r'[+-]?\d+', 'NUMBER': r'[+-]?\d+',
'_END': r'\$',
} }


RULES = { RULES = {
@@ -131,8 +130,7 @@ RULES = {
'nonterminal', 'nonterminal',
'literal', 'literal',
'range', 'range',
'template_usage',
'end'],
'template_usage'],


'terminal': ['TERMINAL'], 'terminal': ['TERMINAL'],
'nonterminal': ['RULE'], 'nonterminal': ['RULE'],
@@ -141,7 +139,6 @@ RULES = {


'maybe': ['_LBRA expansions _RBRA'], 'maybe': ['_LBRA expansions _RBRA'],
'range': ['STRING _DOTDOT STRING'], 'range': ['STRING _DOTDOT STRING'],
'end': ['_END'],


'template_usage': ['RULE _LBRACE _template_args _RBRACE'], 'template_usage': ['RULE _LBRACE _template_args _RBRACE'],
'_template_args': ['value', '_template_args': ['value',
@@ -302,9 +299,6 @@ class CanonizeTree(Transformer_InPlace):
tokenmods, value = args tokenmods, value = args
return tokenmods + [value] return tokenmods + [value]


def end(self):
return Token('TERMINAL', END)

class PrepareAnonTerminals(Transformer_InPlace): class PrepareAnonTerminals(Transformer_InPlace):
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"


@@ -813,7 +807,6 @@ class GrammarLoader:


term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs]
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs]
term_defs.append((END, (None, 0)))
rule_defs = [options_from_rule(*x) for x in rule_defs] rule_defs = [options_from_rule(*x) for x in rule_defs]


# Execute statements # Execute statements
@@ -906,7 +899,7 @@ class GrammarLoader:
raise GrammarError("Terminal '%s' defined more than once" % name) raise GrammarError("Terminal '%s' defined more than once" % name)
terminal_names.add(name) terminal_names.add(name)


if set(ignore_names) - terminal_names:
if set(ignore_names) > terminal_names:
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names))


resolve_term_references(term_defs) resolve_term_references(term_defs)


+ 2
- 2
lark/parsers/grammar_analysis.py View File

@@ -2,7 +2,7 @@ from collections import Counter, defaultdict


from ..utils import bfs, fzset, classify from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal, END
from ..grammar import Rule, Terminal, NonTerminal




class RulePtr(object): class RulePtr(object):
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False): def __init__(self, parser_conf, debug=False):
self.debug = debug self.debug = debug


root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)])
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
for start in parser_conf.start} for start in parser_conf.start}


rules = parser_conf.rules + list(root_rules.values()) rules = parser_conf.rules + list(root_rules.values())


+ 2
- 2
lark/parsers/lalr_analysis.py View File

@@ -13,7 +13,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
from ..exceptions import GrammarError from ..exceptions import GrammarError


from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
from ..grammar import Rule, END
from ..grammar import Rule


###{standalone ###{standalone


@@ -178,7 +178,7 @@ class LALR_Analyzer(GrammarAnalyzer):
assert(len(root.kernel) == 1) assert(len(root.kernel) == 1)
for rp in root.kernel: for rp in root.kernel:
assert(rp.index == 0) assert(rp.index == 0)
self.directly_reads[(root, rp.next)] = set([ Terminal(END) ])
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])


for state in self.lr0_states: for state in self.lr0_states:
seen = set() seen = set()


+ 5
- 10
lark/parsers/lalr_parser.py View File

@@ -5,7 +5,6 @@
from ..exceptions import UnexpectedToken from ..exceptions import UnexpectedToken
from ..lexer import Token from ..lexer import Token
from ..utils import Enumerator, Serialize from ..utils import Enumerator, Serialize
from ..grammar import END


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable


@@ -106,16 +105,12 @@ class _Parser:


raise raise


token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1)
token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
while True: while True:
_action, arg = get_action(token) _action, arg = get_action(token)
if _action is Shift:
state_stack.append(arg)
value_stack.append(token)
else:
assert(_action is Reduce)
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]
assert(_action is Reduce)
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]


###} ###}

+ 1
- 1
setup.py View File

@@ -6,7 +6,7 @@ __version__ ,= re.findall('__version__ = "(.*)"', open('lark/__init__.py').read(
setup( setup(
name = "lark-parser", name = "lark-parser",
version = __version__, version = __version__,
packages = ['lark', 'lark.parsers', 'lark.tools', 'lark.grammars', 'lark.__pyinstaller', 'lark-stubs'],
packages = ['lark', 'lark.parsers', 'lark.tools', 'lark.grammars', 'lark-stubs'],


requires = [], requires = [],
install_requires = [], install_requires = [],


+ 0
- 36
tests/test_parser.py View File

@@ -1737,42 +1737,6 @@ def _make_parser_test(LEXER, PARSER):
""" """
parser = _Lark(grammar) parser = _Lark(grammar)


@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol(self):
grammar = """
start: a b?
a: "a" $
b: "b"
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol2(self):
grammar = """
start: (a|b)+
a: "a" ("x"|$)
b: "b"
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol3(self):
grammar = """
start: (a|b)+
a: "a" (e|"x")
b: "b"
e: $
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')


@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self): def test_serialize(self):


Loading…
Cancel
Save