Browse Source

Fixed propagate positions. Added lexer_callbacks option.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.4
Erez Shinan 8 years ago
parent
commit
22e525f53e
6 changed files with 33 additions and 17 deletions
  1. +2
    -1
      lark/common.py
  2. +5
    -3
      lark/lark.py
  3. +16
    -4
      lark/lexer.py
  4. +1
    -1
      lark/parse_tree_builder.py
  5. +2
    -2
      lark/parser_frontends.py
  6. +7
    -6
      tests/test_parser.py

+ 2
- 1
lark/common.py View File

@@ -41,10 +41,11 @@ class UnexpectedToken(ParseError):




class LexerConf: class LexerConf:
def __init__(self, tokens, ignore=(), postlex=None):
def __init__(self, tokens, ignore=(), postlex=None, callbacks={}):
self.tokens = tokens self.tokens = tokens
self.ignore = ignore self.ignore = ignore
self.postlex = postlex self.postlex = postlex
self.callbacks = callbacks


class ParserConf: class ParserConf:
def __init__(self, rules, callback, start): def __init__(self, rules, callback, start):


+ 5
- 3
lark/lark.py View File

@@ -39,7 +39,8 @@ class LarkOptions(object):
postlex - Lexer post-processing (Requires standard lexer. Default: None) postlex - Lexer post-processing (Requires standard lexer. Default: None)
start - The start symbol (Default: start) start - The start symbol (Default: start)
profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
propagate_positions - Experimental. Don't use yet.
propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
""" """
__doc__ += OPTIONS_DOC __doc__ += OPTIONS_DOC
def __init__(self, options_dict): def __init__(self, options_dict):
@@ -58,6 +59,7 @@ class LarkOptions(object):
self.ambiguity = o.pop('ambiguity', 'auto') self.ambiguity = o.pop('ambiguity', 'auto')
self.propagate_positions = o.pop('propagate_positions', False) self.propagate_positions = o.pop('propagate_positions', False)
self.earley__predict_all = o.pop('earley__predict_all', False) self.earley__predict_all = o.pop('earley__predict_all', False)
self.lexer_callbacks = o.pop('lexer_callbacks', {})


assert self.parser in ('earley', 'lalr', 'cyk', None) assert self.parser in ('earley', 'lalr', 'cyk', None)


@@ -153,7 +155,7 @@ class Lark:
# Compile the EBNF grammar into BNF # Compile the EBNF grammar into BNF
tokens, self.rules, self.ignore_tokens = self.grammar.compile(lexer=bool(lexer), start=self.options.start) tokens, self.rules, self.ignore_tokens = self.grammar.compile(lexer=bool(lexer), start=self.options.start)


self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex)
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)


if self.options.parser: if self.options.parser:
self.parser = self._build_parser() self.parser = self._build_parser()
@@ -165,7 +167,7 @@ class Lark:
__init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC


def _build_lexer(self): def _build_lexer(self):
return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore)
return Lexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)


def _build_parser(self): def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer) self.parser_class = get_frontend(self.options.parser, self.options.lexer)


+ 16
- 4
lark/lexer.py View File

@@ -82,6 +82,7 @@ class _Lex:
ignore_types = list(ignore_types) ignore_types = list(ignore_types)
line_ctr = LineCounter() line_ctr = LineCounter()


t = None
while True: while True:
lexer = self.lexer lexer = self.lexer
for mre, type_from_index in lexer.mres: for mre, type_from_index in lexer.mres:
@@ -94,8 +95,15 @@ class _Lex:
if t.type in lexer.callback: if t.type in lexer.callback:
t = lexer.callback[t.type](t) t = lexer.callback[t.type](t)
yield t yield t
else:
if type_ in lexer.callback:
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
lexer.callback[type_](t)


line_ctr.feed(value, type_ in newline_types) line_ctr.feed(value, type_ in newline_types)
if t:
t.end_line = line_ctr.line
t.end_column = line_ctr.column
break break
else: else:
if line_ctr.char_pos < len(stream): if line_ctr.char_pos < len(stream):
@@ -163,7 +171,7 @@ def _regexp_has_newline(r):
return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r) return '\n' in r or '\\n' in r or ('(?s)' in r and '.' in r)


class Lexer: class Lexer:
def __init__(self, tokens, ignore=()):
def __init__(self, tokens, ignore=(), user_callbacks={}):
assert all(isinstance(t, TokenDef) for t in tokens), tokens assert all(isinstance(t, TokenDef) for t in tokens), tokens


tokens = list(tokens) tokens = list(tokens)
@@ -189,6 +197,10 @@ class Lexer:
tokens, self.callback = _create_unless(tokens) tokens, self.callback = _create_unless(tokens)
assert all(self.callback.values()) assert all(self.callback.values())


for type_, f in user_callbacks.items():
assert type_ not in self.callback
self.callback[type_] = f

self.tokens = tokens self.tokens = tokens


self.mres = build_mres(tokens) self.mres = build_mres(tokens)
@@ -198,7 +210,7 @@ class Lexer:




class ContextualLexer: class ContextualLexer:
def __init__(self, tokens, states, ignore=(), always_accept=()):
def __init__(self, tokens, states, ignore=(), always_accept=(), user_callbacks={}):
tokens_by_name = {} tokens_by_name = {}
for t in tokens: for t in tokens:
assert t.name not in tokens_by_name, t assert t.name not in tokens_by_name, t
@@ -213,12 +225,12 @@ class ContextualLexer:
except KeyError: except KeyError:
accepts = set(accepts) | set(ignore) | set(always_accept) accepts = set(accepts) | set(ignore) | set(always_accept)
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END'] state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END']
lexer = Lexer(state_tokens, ignore=ignore)
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks)
lexer_by_tokens[key] = lexer lexer_by_tokens[key] = lexer


self.lexers[state] = lexer self.lexers[state] = lexer


self.root_lexer = Lexer(tokens, ignore=ignore)
self.root_lexer = Lexer(tokens, ignore=ignore, user_callbacks=user_callbacks)


self.set_parser_state(None) # Needs to be set on the outside self.set_parser_state(None) # Needs to be set on the outside




+ 1
- 1
lark/parse_tree_builder.py View File

@@ -92,7 +92,7 @@ class PropagatePositions:
for a in reversed(children): for a in reversed(children):
with suppress(AttributeError): with suppress(AttributeError):
res.end_line = a.end_line res.end_line = a.end_line
res.end_col = a.end_col
res.end_column = a.end_column
break break


return res return res


+ 2
- 2
lark/parser_frontends.py View File

@@ -11,13 +11,13 @@ from .tree import Tree
class WithLexer: class WithLexer:
def init_traditional_lexer(self, lexer_conf): def init_traditional_lexer(self, lexer_conf):
self.lexer_conf = lexer_conf self.lexer_conf = lexer_conf
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)


def init_contextual_lexer(self, lexer_conf, parser_conf): def init_contextual_lexer(self, lexer_conf, parser_conf):
self.lexer_conf = lexer_conf self.lexer_conf = lexer_conf
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks)


def lex(self, text): def lex(self, text):
stream = self.lexer.lex(text) stream = self.lexer.lex(text)


+ 7
- 6
tests/test_parser.py View File

@@ -839,12 +839,13 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(d.line, 2) self.assertEqual(d.line, 2)
self.assertEqual(d.column, 1) self.assertEqual(d.column, 1)


# self.assertEqual(a.end_line, 1)
# self.assertEqual(a.end_col, 1)
# self.assertEqual(bc.end_line, 2)
# self.assertEqual(bc.end_col, 1)
# self.assertEqual(d.end_line, 2)
# self.assertEqual(d.end_col, 2)
if LEXER != 'dynamic':
self.assertEqual(a.end_line, 1)
self.assertEqual(a.end_column, 1)
self.assertEqual(bc.end_line, 2)
self.assertEqual(bc.end_column, 1)
self.assertEqual(d.end_line, 2)
self.assertEqual(d.end_column, 2)








Loading…
Cancel
Save