@@ -33,7 +33,7 @@ class LarkOptions: | |||
regex: bool | |||
debug: bool | |||
keep_all_tokens: bool | |||
propagate_positions: bool | |||
propagate_positions: Union[bool, str] | |||
maybe_placeholders: bool | |||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
cache: Union[bool, str] | |||
@@ -77,7 +77,7 @@ class Lark: | |||
regex: bool = False, | |||
debug: bool = False, | |||
keep_all_tokens: bool = False, | |||
propagate_positions: bool = False, | |||
propagate_positions: Union[bool, str] = False, | |||
maybe_placeholders: bool = False, | |||
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | |||
cache: Union[bool, str] = False, | |||
@@ -76,7 +76,7 @@ class TerminalDef: | |||
class Token(str): | |||
type: str | |||
pos_in_stream: int | |||
start_pos: int | |||
value: Any | |||
line: int | |||
column: int | |||
@@ -84,7 +84,7 @@ class Token(str): | |||
end_column: int | |||
end_pos: int | |||
def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||
def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||
... | |||
def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: | |||
@@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||
from .lexer import Token | |||
from .lark import Lark | |||
__version__ = "0.11.3" | |||
__version__ = "0.11.4" |
@@ -44,8 +44,9 @@ class LarkOptions(Serialize): | |||
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | |||
propagate_positions | |||
Propagates (line, column, end_line, end_column) attributes into all tree branches. | |||
Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees. | |||
maybe_placeholders | |||
When True, the ``[]`` operator returns ``None`` when not matched. | |||
When ``True``, the ``[]`` operator returns ``None`` when not matched. | |||
When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. | |||
(default= ``False``. Recommended to set to ``True``) | |||
@@ -145,7 +146,7 @@ class LarkOptions(Serialize): | |||
for name, default in self._defaults.items(): | |||
if name in o: | |||
value = o.pop(name) | |||
if isinstance(default, bool) and name not in ('cache', 'use_bytes'): | |||
if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'): | |||
value = bool(value) | |||
else: | |||
value = default | |||
@@ -573,7 +574,7 @@ class Lark(Serialize): | |||
@property | |||
def source(self): | |||
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning) | |||
warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning) | |||
return self.source_path | |||
@source.setter | |||
@@ -582,7 +583,7 @@ class Lark(Serialize): | |||
@property | |||
def grammar_source(self): | |||
warn("Lark.grammar_source attribute has been renamed to Lark.source_grammar", DeprecationWarning) | |||
warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning) | |||
return self.source_grammar | |||
@grammar_source.setter | |||
@@ -6,6 +6,7 @@ from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress | |||
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||
###{standalone | |||
from warnings import warn | |||
from copy import copy | |||
@@ -128,9 +129,9 @@ class Token(Str): | |||
end_column will be 5. | |||
end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``) | |||
""" | |||
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | |||
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | |||
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | |||
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): | |||
try: | |||
self = super(Token, cls).__new__(cls, value) | |||
except UnicodeDecodeError: | |||
@@ -138,7 +139,7 @@ class Token(Str): | |||
self = super(Token, cls).__new__(cls, value) | |||
self.type = type_ | |||
self.pos_in_stream = pos_in_stream | |||
self.start_pos = start_pos if start_pos is not None else pos_in_stream | |||
self.value = value | |||
self.line = line | |||
self.column = column | |||
@@ -147,6 +148,11 @@ class Token(Str): | |||
self.end_pos = end_pos | |||
return self | |||
@property | |||
def pos_in_stream(self): | |||
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning) | |||
return self.start_pos | |||
def update(self, type_=None, value=None): | |||
return Token.new_borrow_pos( | |||
type_ if type_ is not None else self.type, | |||
@@ -156,16 +162,16 @@ class Token(Str): | |||
@classmethod | |||
def new_borrow_pos(cls, type_, value, borrow_t): | |||
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||
def __reduce__(self): | |||
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) | |||
return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column)) | |||
def __repr__(self): | |||
return 'Token(%r, %r)' % (self.type, self.value) | |||
def __deepcopy__(self, memo): | |||
return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) | |||
return Token(self.type, self.value, self.start_pos, self.line, self.column) | |||
def __eq__(self, other): | |||
if isinstance(other, Token) and self.type != other.type: | |||
@@ -1,4 +1,4 @@ | |||
from .exceptions import GrammarError | |||
from .exceptions import GrammarError, ConfigurationError | |||
from .lexer import Token | |||
from .tree import Tree | |||
from .visitors import InlineTransformer # XXX Deprecated | |||
@@ -21,6 +21,7 @@ class ExpandSingleChild: | |||
return self.node_builder(children) | |||
class PropagatePositions: | |||
def __init__(self, node_builder): | |||
self.node_builder = node_builder | |||
@@ -31,40 +32,52 @@ class PropagatePositions: | |||
# local reference to Tree.meta reduces number of presence checks | |||
if isinstance(res, Tree): | |||
res_meta = res.meta | |||
for c in children: | |||
if isinstance(c, Tree): | |||
child_meta = c.meta | |||
if not child_meta.empty: | |||
res_meta.line = child_meta.line | |||
res_meta.column = child_meta.column | |||
res_meta.start_pos = child_meta.start_pos | |||
res_meta.empty = False | |||
break | |||
elif isinstance(c, Token): | |||
res_meta.line = c.line | |||
res_meta.column = c.column | |||
res_meta.start_pos = c.pos_in_stream | |||
res_meta.empty = False | |||
break | |||
for c in reversed(children): | |||
if isinstance(c, Tree): | |||
child_meta = c.meta | |||
if not child_meta.empty: | |||
res_meta.end_line = child_meta.end_line | |||
res_meta.end_column = child_meta.end_column | |||
res_meta.end_pos = child_meta.end_pos | |||
res_meta.empty = False | |||
break | |||
elif isinstance(c, Token): | |||
res_meta.end_line = c.end_line | |||
res_meta.end_column = c.end_column | |||
res_meta.end_pos = c.end_pos | |||
res_meta.empty = False | |||
break | |||
src_meta = self._pp_get_meta(children) | |||
if src_meta is not None: | |||
res_meta.line = src_meta.line | |||
res_meta.column = src_meta.column | |||
res_meta.start_pos = src_meta.start_pos | |||
res_meta.empty = False | |||
src_meta = self._pp_get_meta(reversed(children)) | |||
if src_meta is not None: | |||
res_meta.end_line = src_meta.end_line | |||
res_meta.end_column = src_meta.end_column | |||
res_meta.end_pos = src_meta.end_pos | |||
res_meta.empty = False | |||
return res | |||
def _pp_get_meta(self, children): | |||
for c in children: | |||
if isinstance(c, Tree): | |||
if not c.meta.empty: | |||
return c.meta | |||
elif isinstance(c, Token): | |||
return c | |||
class PropagatePositions_IgnoreWs(PropagatePositions): | |||
def _pp_get_meta(self, children): | |||
for c in children: | |||
if isinstance(c, Tree): | |||
if not c.meta.empty: | |||
return c.meta | |||
elif isinstance(c, Token): | |||
if c and not c.isspace(): # Disregard whitespace-only tokens | |||
return c | |||
def make_propagate_positions(option): | |||
if option == "ignore_ws": | |||
return PropagatePositions_IgnoreWs | |||
elif option is True: | |||
return PropagatePositions | |||
elif option is False: | |||
return None | |||
raise ConfigurationError('Invalid option for propagate_positions: %r' % option) | |||
class ChildFilter: | |||
def __init__(self, to_include, append_none, node_builder): | |||
@@ -320,6 +333,8 @@ class ParseTreeBuilder: | |||
self.rule_builders = list(self._init_builders(rules)) | |||
def _init_builders(self, rules): | |||
propagate_positions = make_propagate_positions(self.propagate_positions) | |||
for rule in rules: | |||
options = rule.options | |||
keep_all_tokens = options.keep_all_tokens | |||
@@ -328,7 +343,7 @@ class ParseTreeBuilder: | |||
wrapper_chain = list(filter(None, [ | |||
(expand_single_child and not rule.alias) and ExpandSingleChild, | |||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | |||
self.propagate_positions and PropagatePositions, | |||
propagate_positions, | |||
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | |||
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) | |||
])) | |||