Procházet zdrojové kódy

Reverted changes regarding EOF

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.8
Erez Sh před 4 roky
rodič
revize
3bee21051e
9 změnil soubory, kde provedl 15 přidání a 64 odebrání
  1. +1
    -1
      lark/__init__.py
  2. +0
    -1
      lark/grammar.py
  3. +1
    -1
      lark/lexer.py
  4. +3
    -10
      lark/load_grammar.py
  5. +2
    -2
      lark/parsers/grammar_analysis.py
  6. +2
    -2
      lark/parsers/lalr_analysis.py
  7. +5
    -10
      lark/parsers/lalr_parser.py
  8. +1
    -1
      setup.py
  9. +0
    -36
      tests/test_parser.py

+ 1
- 1
lark/__init__.py Zobrazit soubor

@@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
from .lexer import Token
from .lark import Lark

__version__ = "0.8.7"
__version__ = "0.8.6"

+ 0
- 1
lark/grammar.py Zobrazit soubor

@@ -1,7 +1,6 @@
from .utils import Serialize

###{standalone
END = '_END$'

class Symbol(Serialize):
__slots__ = ('name',)


+ 1
- 1
lark/lexer.py Zobrazit soubor

@@ -309,7 +309,7 @@ class TraditionalLexer(Lexer):
if t.pattern.min_width == 0:
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))

assert set(ignore) <= {t.name for t in terminals}, (ignore, terminals)
assert set(ignore) <= {t.name for t in terminals}

# Init
self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]


+ 3
- 10
lark/load_grammar.py Zobrazit soubor

@@ -11,7 +11,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR_TraditionalLexer
from .common import LexerConf, ParserConf
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress, dedup_list, Str
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken

@@ -94,7 +94,6 @@ TERMINALS = {
'_DECLARE': r'%declare',
'_IMPORT': r'%import',
'NUMBER': r'[+-]?\d+',
'_END': r'\$',
}

RULES = {
@@ -131,8 +130,7 @@ RULES = {
'nonterminal',
'literal',
'range',
'template_usage',
'end'],
'template_usage'],

'terminal': ['TERMINAL'],
'nonterminal': ['RULE'],
@@ -141,7 +139,6 @@ RULES = {

'maybe': ['_LBRA expansions _RBRA'],
'range': ['STRING _DOTDOT STRING'],
'end': ['_END'],

'template_usage': ['RULE _LBRACE _template_args _RBRACE'],
'_template_args': ['value',
@@ -302,9 +299,6 @@ class CanonizeTree(Transformer_InPlace):
tokenmods, value = args
return tokenmods + [value]

def end(self):
return Token('TERMINAL', END)

class PrepareAnonTerminals(Transformer_InPlace):
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"

@@ -813,7 +807,6 @@ class GrammarLoader:

term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs]
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs]
term_defs.append((END, (None, 0)))
rule_defs = [options_from_rule(*x) for x in rule_defs]

# Execute statements
@@ -906,7 +899,7 @@ class GrammarLoader:
raise GrammarError("Terminal '%s' defined more than once" % name)
terminal_names.add(name)

if set(ignore_names) - terminal_names:
if set(ignore_names) > terminal_names:
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names))

resolve_term_references(term_defs)


+ 2
- 2
lark/parsers/grammar_analysis.py Zobrazit soubor

@@ -2,7 +2,7 @@ from collections import Counter, defaultdict

from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal, END
from ..grammar import Rule, Terminal, NonTerminal


class RulePtr(object):
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False):
self.debug = debug

root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)])
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
for start in parser_conf.start}

rules = parser_conf.rules + list(root_rules.values())


+ 2
- 2
lark/parsers/lalr_analysis.py Zobrazit soubor

@@ -13,7 +13,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
from ..exceptions import GrammarError

from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
from ..grammar import Rule, END
from ..grammar import Rule

###{standalone

@@ -178,7 +178,7 @@ class LALR_Analyzer(GrammarAnalyzer):
assert(len(root.kernel) == 1)
for rp in root.kernel:
assert(rp.index == 0)
self.directly_reads[(root, rp.next)] = set([ Terminal(END) ])
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])

for state in self.lr0_states:
seen = set()


+ 5
- 10
lark/parsers/lalr_parser.py Zobrazit soubor

@@ -5,7 +5,6 @@
from ..exceptions import UnexpectedToken
from ..lexer import Token
from ..utils import Enumerator, Serialize
from ..grammar import END

from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable

@@ -106,16 +105,12 @@ class _Parser:

raise

token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1)
token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
while True:
_action, arg = get_action(token)
if _action is Shift:
state_stack.append(arg)
value_stack.append(token)
else:
assert(_action is Reduce)
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]
assert(_action is Reduce)
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]

###}

+ 1
- 1
setup.py Zobrazit soubor

@@ -6,7 +6,7 @@ __version__ ,= re.findall('__version__ = "(.*)"', open('lark/__init__.py').read(
setup(
name = "lark-parser",
version = __version__,
packages = ['lark', 'lark.parsers', 'lark.tools', 'lark.grammars', 'lark.__pyinstaller', 'lark-stubs'],
packages = ['lark', 'lark.parsers', 'lark.tools', 'lark.grammars', 'lark-stubs'],

requires = [],
install_requires = [],


+ 0
- 36
tests/test_parser.py Zobrazit soubor

@@ -1737,42 +1737,6 @@ def _make_parser_test(LEXER, PARSER):
"""
parser = _Lark(grammar)

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol(self):
grammar = """
start: a b?
a: "a" $
b: "b"
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol2(self):
grammar = """
start: (a|b)+
a: "a" ("x"|$)
b: "b"
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol3(self):
grammar = """
start: (a|b)+
a: "a" (e|"x")
b: "b"
e: $
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('axa'), Tree('start', [Tree('a', []),Tree('a', [Tree('e', [])])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')

@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self):


Načítá se…
Zrušit
Uložit