Browse Source

Mid work. Not promising

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.7.2
Erez Shinan 5 years ago
parent
commit
f1e844accd
11 changed files with 35 additions and 19 deletions
  1. +1
    -0
      lark/common.py
  2. +3
    -1
      lark/exceptions.py
  3. +3
    -0
      lark/lark.py
  4. +3
    -1
      lark/lexer.py
  5. +3
    -2
      lark/load_grammar.py
  6. +1
    -1
      lark/parsers/cyk.py
  7. +1
    -1
      lark/parsers/earley.py
  8. +1
    -1
      lark/parsers/grammar_analysis.py
  9. +7
    -9
      lark/parsers/lalr_analysis.py
  10. +3
    -3
      lark/parsers/lalr_parser.py
  11. +9
    -0
      tests/test_parser.py

+ 1
- 0
lark/common.py View File

@@ -20,6 +20,7 @@ class LexerConf(Serialize):


class ParserConf: class ParserConf:
def __init__(self, rules, callbacks, start): def __init__(self, rules, callbacks, start):
assert isinstance(start, list)
self.rules = rules self.rules = rules
self.callbacks = callbacks self.callbacks = callbacks
self.start = start self.start = start


+ 3
- 1
lark/exceptions.py View File

@@ -52,7 +52,7 @@ class UnexpectedInput(LarkError):




class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column) message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)


self.line = line self.line = line
@@ -65,6 +65,8 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
message += '\n\n' + self.get_context(seq) message += '\n\n' + self.get_context(seq)
if allowed: if allowed:
message += '\nExpecting: %s\n' % allowed message += '\nExpecting: %s\n' % allowed
if token_history:
message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)


super(UnexpectedCharacters, self).__init__(message) super(UnexpectedCharacters, self).__init__(message)




+ 3
- 0
lark/lark.py View File

@@ -85,6 +85,9 @@ class LarkOptions(Serialize):


options[name] = value options[name] = value


if isinstance(options['start'], str):
options['start'] = [options['start']]

self.__dict__['options'] = options self.__dict__['options'] = options


assert self.parser in ('earley', 'lalr', 'cyk', None) assert self.parser in ('earley', 'lalr', 'cyk', None)


+ 3
- 1
lark/lexer.py View File

@@ -149,6 +149,7 @@ class _Lex:
newline_types = frozenset(newline_types) newline_types = frozenset(newline_types)
ignore_types = frozenset(ignore_types) ignore_types = frozenset(ignore_types)
line_ctr = LineCounter() line_ctr = LineCounter()
last_token = None


while line_ctr.char_pos < len(stream): while line_ctr.char_pos < len(stream):
lexer = self.lexer lexer = self.lexer
@@ -166,6 +167,7 @@ class _Lex:
t = lexer.callback[t.type](t) t = lexer.callback[t.type](t)
if not isinstance(t, Token): if not isinstance(t, Token):
raise ValueError("Callbacks must return a token (returned %r)" % t) raise ValueError("Callbacks must return a token (returned %r)" % t)
last_token = t
yield t yield t
else: else:
if type_ in lexer.callback: if type_ in lexer.callback:
@@ -180,7 +182,7 @@ class _Lex:
break break
else: else:
allowed = {v for m, tfi in lexer.mres for v in tfi.values()} allowed = {v for m, tfi in lexer.mres for v in tfi.values()}
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state)
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])




class UnlessCallback: class UnlessCallback:


+ 3
- 2
lark/load_grammar.py View File

@@ -554,7 +554,8 @@ class Grammar:
for s in r.expansion for s in r.expansion
if isinstance(s, NonTerminal) if isinstance(s, NonTerminal)
and s != r.origin} and s != r.origin}
compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules]
used_rules |= {NonTerminal(s) for s in start}
compiled_rules = [r for r in compiled_rules if r.origin in used_rules]
if len(compiled_rules) == c: if len(compiled_rules) == c:
break break


@@ -690,7 +691,7 @@ class GrammarLoader:
callback = ParseTreeBuilder(rules, ST).create_callback() callback = ParseTreeBuilder(rules, ST).create_callback()
lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) lexer_conf = LexerConf(terminals, ['WS', 'COMMENT'])


parser_conf = ParserConf(rules, callback, 'start')
parser_conf = ParserConf(rules, callback, ['start'])
self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf) self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf)


self.canonize_tree = CanonizeTree() self.canonize_tree = CanonizeTree()


+ 1
- 1
lark/parsers/cyk.py View File

@@ -89,7 +89,7 @@ class Parser(object):
self.orig_rules = {rule: rule for rule in rules} self.orig_rules = {rule: rule for rule in rules}
rules = [self._to_rule(rule) for rule in rules] rules = [self._to_rule(rule) for rule in rules]
self.grammar = to_cnf(Grammar(rules)) self.grammar = to_cnf(Grammar(rules))
self.start = NT(start)
self.start = NT(start[0])


def _to_rule(self, lark_rule): def _to_rule(self, lark_rule):
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""


+ 1
- 1
lark/parsers/earley.py View File

@@ -274,7 +274,7 @@ class Parser:
assert i == len(columns)-1 assert i == len(columns)-1


def parse(self, stream, start_symbol=None): def parse(self, stream, start_symbol=None):
start_symbol = NonTerminal(start_symbol or self.parser_conf.start)
start_symbol = NonTerminal(start_symbol or self.parser_conf.start[0])


columns = [set()] columns = [set()]
to_scan = set() # The scan buffer. 'Q' in E.Scott's paper. to_scan = set() # The scan buffer. 'Q' in E.Scott's paper.


+ 1
- 1
lark/parsers/grammar_analysis.py View File

@@ -109,7 +109,7 @@ class GrammarAnalyzer(object):
def __init__(self, parser_conf, debug=False): def __init__(self, parser_conf, debug=False):
self.debug = debug self.debug = debug


rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(s), Terminal('$END')]) for s in parser_conf.start]
self.rules_by_origin = classify(rules, lambda r: r.origin) self.rules_by_origin = classify(rules, lambda r: r.origin)


if len(rules) != len(set(rules)): if len(rules) != len(set(rules)):


+ 7
- 9
lark/parsers/lalr_analysis.py View File

@@ -29,10 +29,10 @@ Shift = Action('Shift')
Reduce = Action('Reduce') Reduce = Action('Reduce')


class ParseTable: class ParseTable:
def __init__(self, states, start_state, end_state):
def __init__(self, states, start_state, end_states):
self.states = states self.states = states
self.start_state = start_state self.start_state = start_state
self.end_state = end_state
self.end_states = end_states


def serialize(self, memo): def serialize(self, memo):
tokens = Enumerator() tokens = Enumerator()
@@ -48,7 +48,7 @@ class ParseTable:
'tokens': tokens.reversed(), 'tokens': tokens.reversed(),
'states': states, 'states': states,
'start_state': self.start_state, 'start_state': self.start_state,
'end_state': self.end_state,
'end_states': self.end_states,
} }


@classmethod @classmethod
@@ -59,7 +59,7 @@ class ParseTable:
for token, (action, arg) in actions.items()} for token, (action, arg) in actions.items()}
for state, actions in data['states'].items() for state, actions in data['states'].items()
} }
return cls(states, data['start_state'], data['end_state'])
return cls(states, data['start_state'], data['end_states'])




class IntParseTable(ParseTable): class IntParseTable(ParseTable):
@@ -77,8 +77,8 @@ class IntParseTable(ParseTable):




start_state = state_to_idx[parse_table.start_state] start_state = state_to_idx[parse_table.start_state]
end_state = state_to_idx[parse_table.end_state]
return cls(int_states, start_state, end_state)
end_states = [state_to_idx[s] for s in parse_table.end_states]
return cls(int_states, start_state, end_states)


###} ###}


@@ -130,9 +130,7 @@ class LALR_Analyzer(GrammarAnalyzer):
for _ in bfs([self.start_state], step): for _ in bfs([self.start_state], step):
pass pass


self.end_state ,= self.end_states

self._parse_table = ParseTable(self.states, self.start_state, self.end_state)
self._parse_table = ParseTable(self.states, self.start_state, self.end_states)


if self.debug: if self.debug:
self.parse_table = self._parse_table self.parse_table = self._parse_table


+ 3
- 3
lark/parsers/lalr_parser.py View File

@@ -40,7 +40,7 @@ class _Parser:
def __init__(self, parse_table, callbacks): def __init__(self, parse_table, callbacks):
self.states = parse_table.states self.states = parse_table.states
self.start_state = parse_table.start_state self.start_state = parse_table.start_state
self.end_state = parse_table.end_state
self.end_states = parse_table.end_states
self.callbacks = callbacks self.callbacks = callbacks


def parse(self, seq, set_state=None): def parse(self, seq, set_state=None):
@@ -81,7 +81,7 @@ class _Parser:
for token in stream: for token in stream:
while True: while True:
action, arg = get_action(token) action, arg = get_action(token)
assert arg != self.end_state
assert arg not in self.end_states


if action is Shift: if action is Shift:
state_stack.append(arg) state_stack.append(arg)
@@ -95,7 +95,7 @@ class _Parser:
while True: while True:
_action, arg = get_action(token) _action, arg = get_action(token)
if _action is Shift: if _action is Shift:
assert arg == self.end_state
assert arg in self.end_states
val ,= value_stack val ,= value_stack
return val return val
else: else:


+ 9
- 0
tests/test_parser.py View File

@@ -1523,6 +1523,15 @@ def _make_parser_test(LEXER, PARSER):
parser3 = Lark.deserialize(d, namespace, m) parser3 = Lark.deserialize(d, namespace, m)
self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) ) self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )


def test_multi_start(self):
parser = _Lark('''
a: "x"
b: "x" "b"?
''', start=['a', 'b'])

# parser.parse('acab')
# parser.parse('bcab')





_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()


Loading…
Cancel
Save