Browse Source

Merge branch 'end_symbol' into end_symbol3

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.7
Erez Sh 5 years ago
parent
commit
463a5f2e3f
7 changed files with 37 additions and 8 deletions
  1. +1
    -0
      lark/grammar.py
  2. +1
    -1
      lark/lexer.py
  3. +18
    -3
      lark/load_grammar.py
  4. +2
    -2
      lark/parsers/grammar_analysis.py
  5. +1
    -1
      lark/parsers/lalr_analysis.py
  6. +2
    -1
      lark/parsers/lalr_parser.py
  7. +12
    -0
      tests/test_parser.py

+ 1
- 0
lark/grammar.py View File

@@ -1,6 +1,7 @@
from .utils import Serialize from .utils import Serialize


###{standalone ###{standalone
END = '_END$'


class Symbol(Serialize): class Symbol(Serialize):
__slots__ = ('name',) __slots__ = ('name',)


+ 1
- 1
lark/lexer.py View File

@@ -309,7 +309,7 @@ class TraditionalLexer(Lexer):
if t.pattern.min_width == 0: if t.pattern.min_width == 0:
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))


assert set(ignore) <= {t.name for t in terminals}
assert set(ignore) <= {t.name for t in terminals}, (ignore, terminals)


# Init # Init
self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())]


+ 18
- 3
lark/load_grammar.py View File

@@ -11,7 +11,7 @@ from .lexer import Token, TerminalDef, PatternStr, PatternRE
from .parse_tree_builder import ParseTreeBuilder from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR_TraditionalLexer from .parser_frontends import LALR_TraditionalLexer
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, END
from .utils import classify, suppress, dedup_list, Str from .utils import classify, suppress, dedup_list, Str
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken


@@ -91,7 +91,12 @@ TERMINALS = {
'_IGNORE': r'%ignore', '_IGNORE': r'%ignore',
'_DECLARE': r'%declare', '_DECLARE': r'%declare',
'_IMPORT': r'%import', '_IMPORT': r'%import',
<<<<<<< HEAD
'NUMBER': r'[+-]?\d+', 'NUMBER': r'[+-]?\d+',
=======
'NUMBER': r'\d+',
'_END': r'\$',
>>>>>>> end_symbol
} }


RULES = { RULES = {
@@ -123,7 +128,8 @@ RULES = {
'value': ['terminal', 'value': ['terminal',
'nonterminal', 'nonterminal',
'literal', 'literal',
'range'],
'range',
'end'],


'terminal': ['TERMINAL'], 'terminal': ['TERMINAL'],
'nonterminal': ['RULE'], 'nonterminal': ['RULE'],
@@ -131,7 +137,12 @@ RULES = {
'?name': ['RULE', 'TERMINAL'], '?name': ['RULE', 'TERMINAL'],


'maybe': ['_LBRA expansions _RBRA'], 'maybe': ['_LBRA expansions _RBRA'],
<<<<<<< HEAD
'range': ['STRING _DOTDOT STRING'], 'range': ['STRING _DOTDOT STRING'],
=======
'range': ['STRING _DOT _DOT STRING'],
'end': ['_END'],
>>>>>>> end_symbol


'term': ['TERMINAL _COLON expansions _NL', 'term': ['TERMINAL _COLON expansions _NL',
'TERMINAL _DOT NUMBER _COLON expansions _NL'], 'TERMINAL _DOT NUMBER _COLON expansions _NL'],
@@ -286,6 +297,9 @@ class CanonizeTree(Transformer_InPlace):
tokenmods, value = args tokenmods, value = args
return tokenmods + [value] return tokenmods + [value]


def end(self):
return Token('TERMINAL', END)

class PrepareAnonTerminals(Transformer_InPlace): class PrepareAnonTerminals(Transformer_InPlace):
"Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them" "Create a unique list of anonymous terminals. Attempt to give meaningful names to them when we add them"


@@ -733,6 +747,7 @@ class GrammarLoader:


term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs]
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs]
term_defs.append((END, (None, 0)))
rule_defs = [options_from_rule(*x) for x in rule_defs] rule_defs = [options_from_rule(*x) for x in rule_defs]


# Execute statements # Execute statements
@@ -825,7 +840,7 @@ class GrammarLoader:
raise GrammarError("Terminal '%s' defined more than once" % name) raise GrammarError("Terminal '%s' defined more than once" % name)
terminal_names.add(name) terminal_names.add(name)


if set(ignore_names) > terminal_names:
if set(ignore_names) - terminal_names:
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names)) raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(ignore_names) - terminal_names))


resolve_term_references(term_defs) resolve_term_references(term_defs)


+ 2
- 2
lark/parsers/grammar_analysis.py View File

@@ -2,7 +2,7 @@ from collections import Counter, defaultdict


from ..utils import bfs, fzset, classify from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal
from ..grammar import Rule, Terminal, NonTerminal, END




class RulePtr(object): class RulePtr(object):
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False): def __init__(self, parser_conf, debug=False):
self.debug = debug self.debug = debug


root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal(END)])
for start in parser_conf.start} for start in parser_conf.start}


rules = parser_conf.rules + list(root_rules.values()) rules = parser_conf.rules + list(root_rules.values())


+ 1
- 1
lark/parsers/lalr_analysis.py View File

@@ -13,7 +13,7 @@ from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
from ..exceptions import GrammarError from ..exceptions import GrammarError


from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
from ..grammar import Rule
from ..grammar import Rule, END


###{standalone ###{standalone




+ 2
- 1
lark/parsers/lalr_parser.py View File

@@ -5,6 +5,7 @@
from ..exceptions import UnexpectedToken from ..exceptions import UnexpectedToken
from ..lexer import Token from ..lexer import Token
from ..utils import Enumerator, Serialize from ..utils import Enumerator, Serialize
from ..grammar import END


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable


@@ -93,7 +94,7 @@ class _Parser:
else: else:
reduce(arg) reduce(arg)


token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
token = Token.new_borrow_pos(END, None, token) if token else Token(END, None, 0, 1, 1)
while True: while True:
_action, arg = get_action(token) _action, arg = get_action(token)
assert(_action is Reduce) assert(_action is Reduce)


+ 12
- 0
tests/test_parser.py View File

@@ -1648,6 +1648,18 @@ def _make_parser_test(LEXER, PARSER):
""" """
parser = _Lark(grammar) parser = _Lark(grammar)


@unittest.skipIf(PARSER!='lalr', "Using the end symbol currently works for LALR only")
def test_end_symbol(self):
grammar = """
start: a b?
a: "a" $
b: "b"
"""
parser = _Lark(grammar)

self.assertEqual(parser.parse('a'), Tree('start', [Tree('a', [])]))
self.assertRaises(UnexpectedInput, parser.parse, 'ab')



@unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self): def test_serialize(self):


Loading…
Cancel
Save