Browse Source

Merge pull request #907 from lark-parser/propagate_positions_fix2

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Shinan 4 years ago
committed by GitHub
parent
commit
e4904b32da
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 70 additions and 48 deletions
  1. +2
    -2
      lark-stubs/lark.pyi
  2. +2
    -2
      lark-stubs/lexer.pyi
  3. +1
    -1
      lark/__init__.py
  4. +5
    -4
      lark/lark.py
  5. +12
    -6
      lark/lexer.py
  6. +48
    -33
      lark/parse_tree_builder.py

+ 2
- 2
lark-stubs/lark.pyi View File

@@ -33,7 +33,7 @@ class LarkOptions:
regex: bool regex: bool
debug: bool debug: bool
keep_all_tokens: bool keep_all_tokens: bool
propagate_positions: bool
propagate_positions: Union[bool, str]
maybe_placeholders: bool maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]] lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str] cache: Union[bool, str]
@@ -77,7 +77,7 @@ class Lark:
regex: bool = False, regex: bool = False,
debug: bool = False, debug: bool = False,
keep_all_tokens: bool = False, keep_all_tokens: bool = False,
propagate_positions: bool = False,
propagate_positions: Union[bool, str] = False,
maybe_placeholders: bool = False, maybe_placeholders: bool = False,
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
cache: Union[bool, str] = False, cache: Union[bool, str] = False,


+ 2
- 2
lark-stubs/lexer.pyi View File

@@ -76,7 +76,7 @@ class TerminalDef:


class Token(str): class Token(str):
type: str type: str
pos_in_stream: int
start_pos: int
value: Any value: Any
line: int line: int
column: int column: int
@@ -84,7 +84,7 @@ class Token(str):
end_column: int end_column: int
end_pos: int end_pos: int


def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None:
def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None:
... ...


def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token:


+ 1
- 1
lark/__init__.py View File

@@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
from .lexer import Token from .lexer import Token
from .lark import Lark from .lark import Lark


__version__ = "0.11.3"
__version__ = "0.11.4"

+ 5
- 4
lark/lark.py View File

@@ -44,8 +44,9 @@ class LarkOptions(Serialize):
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches. Propagates (line, column, end_line, end_column) attributes into all tree branches.
Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees.
maybe_placeholders maybe_placeholders
When True, the ``[]`` operator returns ``None`` when not matched.
When ``True``, the ``[]`` operator returns ``None`` when not matched.


When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
(default= ``False``. Recommended to set to ``True``) (default= ``False``. Recommended to set to ``True``)
@@ -145,7 +146,7 @@ class LarkOptions(Serialize):
for name, default in self._defaults.items(): for name, default in self._defaults.items():
if name in o: if name in o:
value = o.pop(name) value = o.pop(name)
if isinstance(default, bool) and name not in ('cache', 'use_bytes'):
if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
value = bool(value) value = bool(value)
else: else:
value = default value = default
@@ -573,7 +574,7 @@ class Lark(Serialize):


@property @property
def source(self): def source(self):
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning)
warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning)
return self.source_path return self.source_path


@source.setter @source.setter
@@ -582,7 +583,7 @@ class Lark(Serialize):


@property @property
def grammar_source(self): def grammar_source(self):
warn("Lark.grammar_source attribute has been renamed to Lark.source_grammar", DeprecationWarning)
warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning)
return self.source_grammar return self.source_grammar


@grammar_source.setter @grammar_source.setter


+ 12
- 6
lark/lexer.py View File

@@ -6,6 +6,7 @@ from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken


###{standalone ###{standalone
from warnings import warn
from copy import copy from copy import copy




@@ -128,9 +129,9 @@ class Token(Str):
end_column will be 5. end_column will be 5.
end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``) end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``)
""" """
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')


def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
try: try:
self = super(Token, cls).__new__(cls, value) self = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError: except UnicodeDecodeError:
@@ -138,7 +139,7 @@ class Token(Str):
self = super(Token, cls).__new__(cls, value) self = super(Token, cls).__new__(cls, value)


self.type = type_ self.type = type_
self.pos_in_stream = pos_in_stream
self.start_pos = start_pos if start_pos is not None else pos_in_stream
self.value = value self.value = value
self.line = line self.line = line
self.column = column self.column = column
@@ -147,6 +148,11 @@ class Token(Str):
self.end_pos = end_pos self.end_pos = end_pos
return self return self


@property
def pos_in_stream(self):
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning)
return self.start_pos

def update(self, type_=None, value=None): def update(self, type_=None, value=None):
return Token.new_borrow_pos( return Token.new_borrow_pos(
type_ if type_ is not None else self.type, type_ if type_ is not None else self.type,
@@ -156,16 +162,16 @@ class Token(Str):


@classmethod @classmethod
def new_borrow_pos(cls, type_, value, borrow_t): def new_borrow_pos(cls, type_, value, borrow_t):
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)


def __reduce__(self): def __reduce__(self):
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column))
return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))


def __repr__(self): def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value) return 'Token(%r, %r)' % (self.type, self.value)


def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
return Token(self.type, self.value, self.start_pos, self.line, self.column)


def __eq__(self, other): def __eq__(self, other):
if isinstance(other, Token) and self.type != other.type: if isinstance(other, Token) and self.type != other.type:


+ 48
- 33
lark/parse_tree_builder.py View File

@@ -1,4 +1,4 @@
from .exceptions import GrammarError
from .exceptions import GrammarError, ConfigurationError
from .lexer import Token from .lexer import Token
from .tree import Tree from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated from .visitors import InlineTransformer # XXX Deprecated
@@ -21,6 +21,7 @@ class ExpandSingleChild:
return self.node_builder(children) return self.node_builder(children)





class PropagatePositions: class PropagatePositions:
def __init__(self, node_builder): def __init__(self, node_builder):
self.node_builder = node_builder self.node_builder = node_builder
@@ -31,40 +32,52 @@ class PropagatePositions:
# local reference to Tree.meta reduces number of presence checks # local reference to Tree.meta reduces number of presence checks
if isinstance(res, Tree): if isinstance(res, Tree):
res_meta = res.meta res_meta = res.meta
for c in children:
if isinstance(c, Tree):
child_meta = c.meta
if not child_meta.empty:
res_meta.line = child_meta.line
res_meta.column = child_meta.column
res_meta.start_pos = child_meta.start_pos
res_meta.empty = False
break
elif isinstance(c, Token):
res_meta.line = c.line
res_meta.column = c.column
res_meta.start_pos = c.pos_in_stream
res_meta.empty = False
break

for c in reversed(children):
if isinstance(c, Tree):
child_meta = c.meta
if not child_meta.empty:
res_meta.end_line = child_meta.end_line
res_meta.end_column = child_meta.end_column
res_meta.end_pos = child_meta.end_pos
res_meta.empty = False
break
elif isinstance(c, Token):
res_meta.end_line = c.end_line
res_meta.end_column = c.end_column
res_meta.end_pos = c.end_pos
res_meta.empty = False
break

src_meta = self._pp_get_meta(children)
if src_meta is not None:
res_meta.line = src_meta.line
res_meta.column = src_meta.column
res_meta.start_pos = src_meta.start_pos
res_meta.empty = False

src_meta = self._pp_get_meta(reversed(children))
if src_meta is not None:
res_meta.end_line = src_meta.end_line
res_meta.end_column = src_meta.end_column
res_meta.end_pos = src_meta.end_pos
res_meta.empty = False


return res return res


def _pp_get_meta(self, children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
return c

class PropagatePositions_IgnoreWs(PropagatePositions):
def _pp_get_meta(self, children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
if c and not c.isspace(): # Disregard whitespace-only tokens
return c


def make_propagate_positions(option):
if option == "ignore_ws":
return PropagatePositions_IgnoreWs
elif option is True:
return PropagatePositions
elif option is False:
return None

raise ConfigurationError('Invalid option for propagate_positions: %r' % option)



class ChildFilter: class ChildFilter:
def __init__(self, to_include, append_none, node_builder): def __init__(self, to_include, append_none, node_builder):
@@ -320,6 +333,8 @@ class ParseTreeBuilder:
self.rule_builders = list(self._init_builders(rules)) self.rule_builders = list(self._init_builders(rules))


def _init_builders(self, rules): def _init_builders(self, rules):
propagate_positions = make_propagate_positions(self.propagate_positions)

for rule in rules: for rule in rules:
options = rule.options options = rule.options
keep_all_tokens = options.keep_all_tokens keep_all_tokens = options.keep_all_tokens
@@ -328,7 +343,7 @@ class ParseTreeBuilder:
wrapper_chain = list(filter(None, [ wrapper_chain = list(filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild, (expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
self.propagate_positions and PropagatePositions,
propagate_positions,
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
])) ]))


Loading…
Cancel
Save