@@ -33,7 +33,7 @@ class LarkOptions: | |||||
regex: bool | regex: bool | ||||
debug: bool | debug: bool | ||||
keep_all_tokens: bool | keep_all_tokens: bool | ||||
propagate_positions: Union[bool, str] | |||||
propagate_positions: Union[bool, Callable] | |||||
maybe_placeholders: bool | maybe_placeholders: bool | ||||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | lexer_callbacks: Dict[str, Callable[[Token], Token]] | ||||
cache: Union[bool, str] | cache: Union[bool, str] | ||||
@@ -77,7 +77,7 @@ class Lark: | |||||
regex: bool = False, | regex: bool = False, | ||||
debug: bool = False, | debug: bool = False, | ||||
keep_all_tokens: bool = False, | keep_all_tokens: bool = False, | ||||
propagate_positions: Union[bool, str] = False, | |||||
propagate_positions: Union[bool, Callable] = False, | |||||
maybe_placeholders: bool = False, | maybe_placeholders: bool = False, | ||||
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | ||||
cache: Union[bool, str] = False, | cache: Union[bool, str] = False, | ||||
@@ -44,7 +44,7 @@ class LarkOptions(Serialize): | |||||
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | ||||
propagate_positions | propagate_positions | ||||
Propagates (line, column, end_line, end_column) attributes into all tree branches. | Propagates (line, column, end_line, end_column) attributes into all tree branches. | ||||
Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees. | |||||
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. | |||||
maybe_placeholders | maybe_placeholders | ||||
When ``True``, the ``[]`` operator returns ``None`` when not matched. | When ``True``, the ``[]`` operator returns ``None`` when not matched. | ||||
@@ -162,7 +162,7 @@ class LarkOptions(Serialize): | |||||
assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | ||||
if self.parser == 'earley' and self.transformer: | if self.parser == 'earley' and self.transformer: | ||||
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||||
raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' | |||||
'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | ||||
if o: | if o: | ||||
@@ -133,20 +133,20 @@ class Token(Str): | |||||
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): | def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): | ||||
try: | try: | ||||
self = super(Token, cls).__new__(cls, value) | |||||
inst = super(Token, cls).__new__(cls, value) | |||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
value = value.decode('latin1') | value = value.decode('latin1') | ||||
self = super(Token, cls).__new__(cls, value) | |||||
self.type = type_ | |||||
self.start_pos = start_pos if start_pos is not None else pos_in_stream | |||||
self.value = value | |||||
self.line = line | |||||
self.column = column | |||||
self.end_line = end_line | |||||
self.end_column = end_column | |||||
self.end_pos = end_pos | |||||
return self | |||||
inst = super(Token, cls).__new__(cls, value) | |||||
inst.type = type_ | |||||
inst.start_pos = start_pos if start_pos is not None else pos_in_stream | |||||
inst.value = value | |||||
inst.line = line | |||||
inst.column = column | |||||
inst.end_line = end_line | |||||
inst.end_column = end_column | |||||
inst.end_pos = end_pos | |||||
return inst | |||||
@property | @property | ||||
def pos_in_stream(self): | def pos_in_stream(self): | ||||
@@ -258,8 +258,8 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||||
if unless: | if unless: | ||||
callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) | callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) | ||||
terminals = [t for t in terminals if t not in embedded_strs] | |||||
return terminals, callback | |||||
new_terminals = [t for t in terminals if t not in embedded_strs] | |||||
return new_terminals, callback | |||||
@@ -23,8 +23,9 @@ class ExpandSingleChild: | |||||
class PropagatePositions: | class PropagatePositions: | ||||
def __init__(self, node_builder): | |||||
def __init__(self, node_builder, node_filter=None): | |||||
self.node_builder = node_builder | self.node_builder = node_builder | ||||
self.node_filter = node_filter | |||||
def __call__(self, children): | def __call__(self, children): | ||||
res = self.node_builder(children) | res = self.node_builder(children) | ||||
@@ -33,44 +34,35 @@ class PropagatePositions: | |||||
if isinstance(res, Tree): | if isinstance(res, Tree): | ||||
res_meta = res.meta | res_meta = res.meta | ||||
src_meta = self._pp_get_meta(children) | |||||
if src_meta is not None: | |||||
res_meta.line = src_meta.line | |||||
res_meta.column = src_meta.column | |||||
res_meta.start_pos = src_meta.start_pos | |||||
first_meta = self._pp_get_meta(children) | |||||
if first_meta is not None: | |||||
res_meta.line = first_meta.line | |||||
res_meta.column = first_meta.column | |||||
res_meta.start_pos = first_meta.start_pos | |||||
res_meta.empty = False | res_meta.empty = False | ||||
src_meta = self._pp_get_meta(reversed(children)) | |||||
if src_meta is not None: | |||||
res_meta.end_line = src_meta.end_line | |||||
res_meta.end_column = src_meta.end_column | |||||
res_meta.end_pos = src_meta.end_pos | |||||
last_meta = self._pp_get_meta(reversed(children)) | |||||
if last_meta is not None: | |||||
res_meta.end_line = last_meta.end_line | |||||
res_meta.end_column = last_meta.end_column | |||||
res_meta.end_pos = last_meta.end_pos | |||||
res_meta.empty = False | res_meta.empty = False | ||||
return res | return res | ||||
def _pp_get_meta(self, children): | def _pp_get_meta(self, children): | ||||
for c in children: | for c in children: | ||||
if self.node_filter is not None and not self.node_filter(c): | |||||
continue | |||||
if isinstance(c, Tree): | if isinstance(c, Tree): | ||||
if not c.meta.empty: | if not c.meta.empty: | ||||
return c.meta | return c.meta | ||||
elif isinstance(c, Token): | elif isinstance(c, Token): | ||||
return c | return c | ||||
class PropagatePositions_IgnoreWs(PropagatePositions): | |||||
def _pp_get_meta(self, children): | |||||
for c in children: | |||||
if isinstance(c, Tree): | |||||
if not c.meta.empty: | |||||
return c.meta | |||||
elif isinstance(c, Token): | |||||
if c and not c.isspace(): # Disregard whitespace-only tokens | |||||
return c | |||||
def make_propagate_positions(option): | def make_propagate_positions(option): | ||||
if option == "ignore_ws": | |||||
return PropagatePositions_IgnoreWs | |||||
if callable(option): | |||||
return partial(PropagatePositions, node_filter=option) | |||||
elif option is True: | elif option is True: | ||||
return PropagatePositions | return PropagatePositions | ||||
elif option is False: | elif option is False: | ||||
@@ -92,26 +92,26 @@ class ParsingFrontend(Serialize): | |||||
def _verify_start(self, start=None): | def _verify_start(self, start=None): | ||||
if start is None: | if start is None: | ||||
start = self.parser_conf.start | |||||
if len(start) > 1: | |||||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||||
start ,= start | |||||
start_decls = self.parser_conf.start | |||||
if len(start_decls) > 1: | |||||
raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) | |||||
start ,= start_decls | |||||
elif start not in self.parser_conf.start: | elif start not in self.parser_conf.start: | ||||
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) | raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) | ||||
return start | return start | ||||
def parse(self, text, start=None, on_error=None): | def parse(self, text, start=None, on_error=None): | ||||
start = self._verify_start(start) | |||||
chosen_start = self._verify_start(start) | |||||
stream = text if self.skip_lexer else LexerThread(self.lexer, text) | stream = text if self.skip_lexer else LexerThread(self.lexer, text) | ||||
kw = {} if on_error is None else {'on_error': on_error} | kw = {} if on_error is None else {'on_error': on_error} | ||||
return self.parser.parse(stream, start, **kw) | |||||
return self.parser.parse(stream, chosen_start, **kw) | |||||
def parse_interactive(self, text=None, start=None): | def parse_interactive(self, text=None, start=None): | ||||
start = self._verify_start(start) | |||||
chosen_start = self._verify_start(start) | |||||
if self.parser_conf.parser_type != 'lalr': | if self.parser_conf.parser_type != 'lalr': | ||||
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") | raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") | ||||
stream = text if self.skip_lexer else LexerThread(self.lexer, text) | stream = text if self.skip_lexer else LexerThread(self.lexer, text) | ||||
return self.parser.parse_interactive(stream, start) | |||||
return self.parser.parse_interactive(stream, chosen_start) | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||