Browse Source

Change how propagate_positions work

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Sh 3 years ago
parent
commit
3bc070bc1d
5 changed files with 42 additions and 50 deletions
  1. +2
    -2
      lark-stubs/lark.pyi
  2. +2
    -2
      lark/lark.py
  3. +14
    -14
      lark/lexer.py
  4. +16
    -24
      lark/parse_tree_builder.py
  5. +8
    -8
      lark/parser_frontends.py

+ 2
- 2
lark-stubs/lark.pyi View File

@@ -33,7 +33,7 @@ class LarkOptions:
regex: bool regex: bool
debug: bool debug: bool
keep_all_tokens: bool keep_all_tokens: bool
propagate_positions: Union[bool, str]
propagate_positions: Union[bool, Callable]
maybe_placeholders: bool maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]] lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str] cache: Union[bool, str]
@@ -77,7 +77,7 @@ class Lark:
regex: bool = False, regex: bool = False,
debug: bool = False, debug: bool = False,
keep_all_tokens: bool = False, keep_all_tokens: bool = False,
propagate_positions: Union[bool, str] = False,
propagate_positions: Union[bool, Callable] = False,
maybe_placeholders: bool = False, maybe_placeholders: bool = False,
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
cache: Union[bool, str] = False, cache: Union[bool, str] = False,


+ 2
- 2
lark/lark.py View File

@@ -44,7 +44,7 @@ class LarkOptions(Serialize):
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches. Propagates (line, column, end_line, end_column) attributes into all tree branches.
Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees.
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
maybe_placeholders maybe_placeholders
When ``True``, the ``[]`` operator returns ``None`` when not matched. When ``True``, the ``[]`` operator returns ``None`` when not matched.


@@ -162,7 +162,7 @@ class LarkOptions(Serialize):
assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) assert_config(self.parser, ('earley', 'lalr', 'cyk', None))


if self.parser == 'earley' and self.transformer: if self.parser == 'earley' and self.transformer:
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.'
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')


if o: if o:


+ 14
- 14
lark/lexer.py View File

@@ -133,20 +133,20 @@ class Token(Str):


def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
try: try:
self = super(Token, cls).__new__(cls, value)
inst = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError: except UnicodeDecodeError:
value = value.decode('latin1') value = value.decode('latin1')
self = super(Token, cls).__new__(cls, value)
self.type = type_
self.start_pos = start_pos if start_pos is not None else pos_in_stream
self.value = value
self.line = line
self.column = column
self.end_line = end_line
self.end_column = end_column
self.end_pos = end_pos
return self
inst = super(Token, cls).__new__(cls, value)
inst.type = type_
inst.start_pos = start_pos if start_pos is not None else pos_in_stream
inst.value = value
inst.line = line
inst.column = column
inst.end_line = end_line
inst.end_column = end_column
inst.end_pos = end_pos
return inst


@property @property
def pos_in_stream(self): def pos_in_stream(self):
@@ -258,8 +258,8 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes):
if unless: if unless:
callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))


terminals = [t for t in terminals if t not in embedded_strs]
return terminals, callback
new_terminals = [t for t in terminals if t not in embedded_strs]
return new_terminals, callback








+ 16
- 24
lark/parse_tree_builder.py View File

@@ -23,8 +23,9 @@ class ExpandSingleChild:




class PropagatePositions: class PropagatePositions:
def __init__(self, node_builder):
def __init__(self, node_builder, node_filter=None):
self.node_builder = node_builder self.node_builder = node_builder
self.node_filter = node_filter


def __call__(self, children): def __call__(self, children):
res = self.node_builder(children) res = self.node_builder(children)
@@ -33,44 +34,35 @@ class PropagatePositions:
if isinstance(res, Tree): if isinstance(res, Tree):
res_meta = res.meta res_meta = res.meta


src_meta = self._pp_get_meta(children)
if src_meta is not None:
res_meta.line = src_meta.line
res_meta.column = src_meta.column
res_meta.start_pos = src_meta.start_pos
first_meta = self._pp_get_meta(children)
if first_meta is not None:
res_meta.line = first_meta.line
res_meta.column = first_meta.column
res_meta.start_pos = first_meta.start_pos
res_meta.empty = False res_meta.empty = False


src_meta = self._pp_get_meta(reversed(children))
if src_meta is not None:
res_meta.end_line = src_meta.end_line
res_meta.end_column = src_meta.end_column
res_meta.end_pos = src_meta.end_pos
last_meta = self._pp_get_meta(reversed(children))
if last_meta is not None:
res_meta.end_line = last_meta.end_line
res_meta.end_column = last_meta.end_column
res_meta.end_pos = last_meta.end_pos
res_meta.empty = False res_meta.empty = False


return res return res


def _pp_get_meta(self, children): def _pp_get_meta(self, children):
for c in children: for c in children:
if self.node_filter is not None and not self.node_filter(c):
continue
if isinstance(c, Tree): if isinstance(c, Tree):
if not c.meta.empty: if not c.meta.empty:
return c.meta return c.meta
elif isinstance(c, Token): elif isinstance(c, Token):
return c return c


class PropagatePositions_IgnoreWs(PropagatePositions):
def _pp_get_meta(self, children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
if c and not c.isspace(): # Disregard whitespace-only tokens
return c


def make_propagate_positions(option): def make_propagate_positions(option):
if option == "ignore_ws":
return PropagatePositions_IgnoreWs
if callable(option):
return partial(PropagatePositions, node_filter=option)
elif option is True: elif option is True:
return PropagatePositions return PropagatePositions
elif option is False: elif option is False:


+ 8
- 8
lark/parser_frontends.py View File

@@ -92,26 +92,26 @@ class ParsingFrontend(Serialize):
def _verify_start(self, start=None): def _verify_start(self, start=None):
if start is None: if start is None:
start = self.parser_conf.start
if len(start) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
start ,= start
start_decls = self.parser_conf.start
if len(start_decls) > 1:
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
start ,= start_decls
elif start not in self.parser_conf.start: elif start not in self.parser_conf.start:
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
return start return start


def parse(self, text, start=None, on_error=None): def parse(self, text, start=None, on_error=None):
start = self._verify_start(start)
chosen_start = self._verify_start(start)
stream = text if self.skip_lexer else LexerThread(self.lexer, text) stream = text if self.skip_lexer else LexerThread(self.lexer, text)
kw = {} if on_error is None else {'on_error': on_error} kw = {} if on_error is None else {'on_error': on_error}
return self.parser.parse(stream, start, **kw)
return self.parser.parse(stream, chosen_start, **kw)
def parse_interactive(self, text=None, start=None): def parse_interactive(self, text=None, start=None):
start = self._verify_start(start)
chosen_start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr': if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
stream = text if self.skip_lexer else LexerThread(self.lexer, text) stream = text if self.skip_lexer else LexerThread(self.lexer, text)
return self.parser.parse_interactive(stream, start)
return self.parser.parse_interactive(stream, chosen_start)




def get_frontend(parser, lexer): def get_frontend(parser, lexer):


Loading…
Cancel
Save