Browse Source

Change how propagate_positions work

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Sh 3 years ago
parent
commit
3bc070bc1d
5 changed files with 42 additions and 50 deletions
  1. +2
    -2
      lark-stubs/lark.pyi
  2. +2
    -2
      lark/lark.py
  3. +14
    -14
      lark/lexer.py
  4. +16
    -24
      lark/parse_tree_builder.py
  5. +8
    -8
      lark/parser_frontends.py

+ 2
- 2
lark-stubs/lark.pyi View File

@@ -33,7 +33,7 @@ class LarkOptions:
regex: bool
debug: bool
keep_all_tokens: bool
propagate_positions: Union[bool, str]
propagate_positions: Union[bool, Callable]
maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str]
@@ -77,7 +77,7 @@ class Lark:
regex: bool = False,
debug: bool = False,
keep_all_tokens: bool = False,
propagate_positions: Union[bool, str] = False,
propagate_positions: Union[bool, Callable] = False,
maybe_placeholders: bool = False,
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
cache: Union[bool, str] = False,


+ 2
- 2
lark/lark.py View File

@@ -44,7 +44,7 @@ class LarkOptions(Serialize):
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches.
Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees.
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
maybe_placeholders
When ``True``, the ``[]`` operator returns ``None`` when not matched.

@@ -162,7 +162,7 @@ class LarkOptions(Serialize):
assert_config(self.parser, ('earley', 'lalr', 'cyk', None))

if self.parser == 'earley' and self.transformer:
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.'
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')

if o:


+ 14
- 14
lark/lexer.py View File

@@ -133,20 +133,20 @@ class Token(Str):

def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
try:
self = super(Token, cls).__new__(cls, value)
inst = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError:
value = value.decode('latin1')
self = super(Token, cls).__new__(cls, value)
self.type = type_
self.start_pos = start_pos if start_pos is not None else pos_in_stream
self.value = value
self.line = line
self.column = column
self.end_line = end_line
self.end_column = end_column
self.end_pos = end_pos
return self
inst = super(Token, cls).__new__(cls, value)
inst.type = type_
inst.start_pos = start_pos if start_pos is not None else pos_in_stream
inst.value = value
inst.line = line
inst.column = column
inst.end_line = end_line
inst.end_column = end_column
inst.end_pos = end_pos
return inst

@property
def pos_in_stream(self):
@@ -258,8 +258,8 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes):
if unless:
callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))

terminals = [t for t in terminals if t not in embedded_strs]
return terminals, callback
new_terminals = [t for t in terminals if t not in embedded_strs]
return new_terminals, callback





+ 16
- 24
lark/parse_tree_builder.py View File

@@ -23,8 +23,9 @@ class ExpandSingleChild:


class PropagatePositions:
def __init__(self, node_builder):
def __init__(self, node_builder, node_filter=None):
self.node_builder = node_builder
self.node_filter = node_filter

def __call__(self, children):
res = self.node_builder(children)
@@ -33,44 +34,35 @@ class PropagatePositions:
if isinstance(res, Tree):
res_meta = res.meta

src_meta = self._pp_get_meta(children)
if src_meta is not None:
res_meta.line = src_meta.line
res_meta.column = src_meta.column
res_meta.start_pos = src_meta.start_pos
first_meta = self._pp_get_meta(children)
if first_meta is not None:
res_meta.line = first_meta.line
res_meta.column = first_meta.column
res_meta.start_pos = first_meta.start_pos
res_meta.empty = False

src_meta = self._pp_get_meta(reversed(children))
if src_meta is not None:
res_meta.end_line = src_meta.end_line
res_meta.end_column = src_meta.end_column
res_meta.end_pos = src_meta.end_pos
last_meta = self._pp_get_meta(reversed(children))
if last_meta is not None:
res_meta.end_line = last_meta.end_line
res_meta.end_column = last_meta.end_column
res_meta.end_pos = last_meta.end_pos
res_meta.empty = False

return res

def _pp_get_meta(self, children):
for c in children:
if self.node_filter is not None and not self.node_filter(c):
continue
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
return c

class PropagatePositions_IgnoreWs(PropagatePositions):
def _pp_get_meta(self, children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
if c and not c.isspace(): # Disregard whitespace-only tokens
return c


def make_propagate_positions(option):
if option == "ignore_ws":
return PropagatePositions_IgnoreWs
if callable(option):
return partial(PropagatePositions, node_filter=option)
elif option is True:
return PropagatePositions
elif option is False:


+ 8
- 8
lark/parser_frontends.py View File

@@ -92,26 +92,26 @@ class ParsingFrontend(Serialize):
def _verify_start(self, start=None):
if start is None:
start = self.parser_conf.start
if len(start) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
start ,= start
start_decls = self.parser_conf.start
if len(start_decls) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
start ,= start_decls
elif start not in self.parser_conf.start:
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
return start

def parse(self, text, start=None, on_error=None):
start = self._verify_start(start)
chosen_start = self._verify_start(start)
stream = text if self.skip_lexer else LexerThread(self.lexer, text)
kw = {} if on_error is None else {'on_error': on_error}
return self.parser.parse(stream, start, **kw)
return self.parser.parse(stream, chosen_start, **kw)
def parse_interactive(self, text=None, start=None):
start = self._verify_start(start)
chosen_start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
stream = text if self.skip_lexer else LexerThread(self.lexer, text)
return self.parser.parse_interactive(stream, start)
return self.parser.parse_interactive(stream, chosen_start)


def get_frontend(parser, lexer):


Loading…
Cancel
Save