| @@ -33,7 +33,7 @@ class LarkOptions: | |||
| regex: bool | |||
| debug: bool | |||
| keep_all_tokens: bool | |||
| propagate_positions: Union[bool, str] | |||
| propagate_positions: Union[bool, Callable] | |||
| maybe_placeholders: bool | |||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
| cache: Union[bool, str] | |||
| @@ -77,7 +77,7 @@ class Lark: | |||
| regex: bool = False, | |||
| debug: bool = False, | |||
| keep_all_tokens: bool = False, | |||
| propagate_positions: Union[bool, str] = False, | |||
| propagate_positions: Union[bool, Callable] = False, | |||
| maybe_placeholders: bool = False, | |||
| lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | |||
| cache: Union[bool, str] = False, | |||
| @@ -44,7 +44,7 @@ class LarkOptions(Serialize): | |||
| Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||
| propagate_positions | |||
| Propagates (line, column, end_line, end_column) attributes into all tree branches. | |||
| Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees. | |||
| Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. | |||
| maybe_placeholders | |||
| When ``True``, the ``[]`` operator returns ``None`` when not matched. | |||
| @@ -162,7 +162,7 @@ class LarkOptions(Serialize): | |||
| assert_config(self.parser, ('earley', 'lalr', 'cyk', None)) | |||
| if self.parser == 'earley' and self.transformer: | |||
| raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm.' | |||
| raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. ' | |||
| 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') | |||
| if o: | |||
| @@ -133,20 +133,20 @@ class Token(Str): | |||
| def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): | |||
| try: | |||
| self = super(Token, cls).__new__(cls, value) | |||
| inst = super(Token, cls).__new__(cls, value) | |||
| except UnicodeDecodeError: | |||
| value = value.decode('latin1') | |||
| self = super(Token, cls).__new__(cls, value) | |||
| self.type = type_ | |||
| self.start_pos = start_pos if start_pos is not None else pos_in_stream | |||
| self.value = value | |||
| self.line = line | |||
| self.column = column | |||
| self.end_line = end_line | |||
| self.end_column = end_column | |||
| self.end_pos = end_pos | |||
| return self | |||
| inst = super(Token, cls).__new__(cls, value) | |||
| inst.type = type_ | |||
| inst.start_pos = start_pos if start_pos is not None else pos_in_stream | |||
| inst.value = value | |||
| inst.line = line | |||
| inst.column = column | |||
| inst.end_line = end_line | |||
| inst.end_column = end_column | |||
| inst.end_pos = end_pos | |||
| return inst | |||
| @property | |||
| def pos_in_stream(self): | |||
| @@ -258,8 +258,8 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||
| if unless: | |||
| callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes)) | |||
| terminals = [t for t in terminals if t not in embedded_strs] | |||
| return terminals, callback | |||
| new_terminals = [t for t in terminals if t not in embedded_strs] | |||
| return new_terminals, callback | |||
| @@ -23,8 +23,9 @@ class ExpandSingleChild: | |||
| class PropagatePositions: | |||
| def __init__(self, node_builder): | |||
| def __init__(self, node_builder, node_filter=None): | |||
| self.node_builder = node_builder | |||
| self.node_filter = node_filter | |||
| def __call__(self, children): | |||
| res = self.node_builder(children) | |||
| @@ -33,44 +34,35 @@ class PropagatePositions: | |||
| if isinstance(res, Tree): | |||
| res_meta = res.meta | |||
| src_meta = self._pp_get_meta(children) | |||
| if src_meta is not None: | |||
| res_meta.line = src_meta.line | |||
| res_meta.column = src_meta.column | |||
| res_meta.start_pos = src_meta.start_pos | |||
| first_meta = self._pp_get_meta(children) | |||
| if first_meta is not None: | |||
| res_meta.line = first_meta.line | |||
| res_meta.column = first_meta.column | |||
| res_meta.start_pos = first_meta.start_pos | |||
| res_meta.empty = False | |||
| src_meta = self._pp_get_meta(reversed(children)) | |||
| if src_meta is not None: | |||
| res_meta.end_line = src_meta.end_line | |||
| res_meta.end_column = src_meta.end_column | |||
| res_meta.end_pos = src_meta.end_pos | |||
| last_meta = self._pp_get_meta(reversed(children)) | |||
| if last_meta is not None: | |||
| res_meta.end_line = last_meta.end_line | |||
| res_meta.end_column = last_meta.end_column | |||
| res_meta.end_pos = last_meta.end_pos | |||
| res_meta.empty = False | |||
| return res | |||
| def _pp_get_meta(self, children): | |||
| for c in children: | |||
| if self.node_filter is not None and not self.node_filter(c): | |||
| continue | |||
| if isinstance(c, Tree): | |||
| if not c.meta.empty: | |||
| return c.meta | |||
| elif isinstance(c, Token): | |||
| return c | |||
| class PropagatePositions_IgnoreWs(PropagatePositions): | |||
| def _pp_get_meta(self, children): | |||
| for c in children: | |||
| if isinstance(c, Tree): | |||
| if not c.meta.empty: | |||
| return c.meta | |||
| elif isinstance(c, Token): | |||
| if c and not c.isspace(): # Disregard whitespace-only tokens | |||
| return c | |||
| def make_propagate_positions(option): | |||
| if option == "ignore_ws": | |||
| return PropagatePositions_IgnoreWs | |||
| if callable(option): | |||
| return partial(PropagatePositions, node_filter=option) | |||
| elif option is True: | |||
| return PropagatePositions | |||
| elif option is False: | |||
| @@ -92,26 +92,26 @@ class ParsingFrontend(Serialize): | |||
| def _verify_start(self, start=None): | |||
| if start is None: | |||
| start = self.parser_conf.start | |||
| if len(start) > 1: | |||
| raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) | |||
| start ,= start | |||
| start_decls = self.parser_conf.start | |||
| if len(start_decls) > 1: | |||
| raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls) | |||
| start ,= start_decls | |||
| elif start not in self.parser_conf.start: | |||
| raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start)) | |||
| return start | |||
| def parse(self, text, start=None, on_error=None): | |||
| start = self._verify_start(start) | |||
| chosen_start = self._verify_start(start) | |||
| stream = text if self.skip_lexer else LexerThread(self.lexer, text) | |||
| kw = {} if on_error is None else {'on_error': on_error} | |||
| return self.parser.parse(stream, start, **kw) | |||
| return self.parser.parse(stream, chosen_start, **kw) | |||
| def parse_interactive(self, text=None, start=None): | |||
| start = self._verify_start(start) | |||
| chosen_start = self._verify_start(start) | |||
| if self.parser_conf.parser_type != 'lalr': | |||
| raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") | |||
| stream = text if self.skip_lexer else LexerThread(self.lexer, text) | |||
| return self.parser.parse_interactive(stream, start) | |||
| return self.parser.parse_interactive(stream, chosen_start) | |||
| def get_frontend(parser, lexer): | |||