Kaynağa Gözat

Merge pull request #907 from lark-parser/propagate_positions_fix2

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Shinan 3 yıl önce
committed by GitHub
ebeveyn
işleme
e4904b32da
Veri tabanında bu imza için bilinen anahtar bulunamadı GPG Anahtar Kimliği: 4AEE18F83AFDEB23
6 değiştirilmiş dosya ile 70 ekleme ve 48 silme
  1. +2
    -2
      lark-stubs/lark.pyi
  2. +2
    -2
      lark-stubs/lexer.pyi
  3. +1
    -1
      lark/__init__.py
  4. +5
    -4
      lark/lark.py
  5. +12
    -6
      lark/lexer.py
  6. +48
    -33
      lark/parse_tree_builder.py

+ 2
- 2
lark-stubs/lark.pyi Dosyayı Görüntüle

@@ -33,7 +33,7 @@ class LarkOptions:
regex: bool
debug: bool
keep_all_tokens: bool
propagate_positions: bool
propagate_positions: Union[bool, str]
maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str]
@@ -77,7 +77,7 @@ class Lark:
regex: bool = False,
debug: bool = False,
keep_all_tokens: bool = False,
propagate_positions: bool = False,
propagate_positions: Union[bool, str] = False,
maybe_placeholders: bool = False,
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
cache: Union[bool, str] = False,


+ 2
- 2
lark-stubs/lexer.pyi Dosyayı Görüntüle

@@ -76,7 +76,7 @@ class TerminalDef:

class Token(str):
type: str
pos_in_stream: int
start_pos: int
value: Any
line: int
column: int
@@ -84,7 +84,7 @@ class Token(str):
end_column: int
end_pos: int

def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None:
def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None:
...

def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token:


+ 1
- 1
lark/__init__.py Dosyayı Görüntüle

@@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
from .lexer import Token
from .lark import Lark

__version__ = "0.11.3"
__version__ = "0.11.4"

+ 5
- 4
lark/lark.py Dosyayı Görüntüle

@@ -44,8 +44,9 @@ class LarkOptions(Serialize):
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions
Propagates (line, column, end_line, end_column) attributes into all tree branches.
Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees.
maybe_placeholders
When True, the ``[]`` operator returns ``None`` when not matched.
When ``True``, the ``[]`` operator returns ``None`` when not matched.

When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
(default= ``False``. Recommended to set to ``True``)
@@ -145,7 +146,7 @@ class LarkOptions(Serialize):
for name, default in self._defaults.items():
if name in o:
value = o.pop(name)
if isinstance(default, bool) and name not in ('cache', 'use_bytes'):
if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
value = bool(value)
else:
value = default
@@ -573,7 +574,7 @@ class Lark(Serialize):

@property
def source(self):
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning)
warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning)
return self.source_path

@source.setter
@@ -582,7 +583,7 @@ class Lark(Serialize):

@property
def grammar_source(self):
warn("Lark.grammar_source attribute has been renamed to Lark.source_grammar", DeprecationWarning)
warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning)
return self.source_grammar

@grammar_source.setter


+ 12
- 6
lark/lexer.py Dosyayı Görüntüle

@@ -6,6 +6,7 @@ from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken

###{standalone
from warnings import warn
from copy import copy


@@ -128,9 +129,9 @@ class Token(Str):
end_column will be 5.
end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``)
"""
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')

def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
try:
self = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError:
@@ -138,7 +139,7 @@ class Token(Str):
self = super(Token, cls).__new__(cls, value)

self.type = type_
self.pos_in_stream = pos_in_stream
self.start_pos = start_pos if start_pos is not None else pos_in_stream
self.value = value
self.line = line
self.column = column
@@ -147,6 +148,11 @@ class Token(Str):
self.end_pos = end_pos
return self

@property
def pos_in_stream(self):
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning)
return self.start_pos

def update(self, type_=None, value=None):
return Token.new_borrow_pos(
type_ if type_ is not None else self.type,
@@ -156,16 +162,16 @@ class Token(Str):

@classmethod
def new_borrow_pos(cls, type_, value, borrow_t):
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)

def __reduce__(self):
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column))
return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))

def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value)

def __deepcopy__(self, memo):
return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
return Token(self.type, self.value, self.start_pos, self.line, self.column)

def __eq__(self, other):
if isinstance(other, Token) and self.type != other.type:


+ 48
- 33
lark/parse_tree_builder.py Dosyayı Görüntüle

@@ -1,4 +1,4 @@
from .exceptions import GrammarError
from .exceptions import GrammarError, ConfigurationError
from .lexer import Token
from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated
@@ -21,6 +21,7 @@ class ExpandSingleChild:
return self.node_builder(children)



class PropagatePositions:
def __init__(self, node_builder):
self.node_builder = node_builder
@@ -31,40 +32,52 @@ class PropagatePositions:
# local reference to Tree.meta reduces number of presence checks
if isinstance(res, Tree):
res_meta = res.meta
for c in children:
if isinstance(c, Tree):
child_meta = c.meta
if not child_meta.empty:
res_meta.line = child_meta.line
res_meta.column = child_meta.column
res_meta.start_pos = child_meta.start_pos
res_meta.empty = False
break
elif isinstance(c, Token):
res_meta.line = c.line
res_meta.column = c.column
res_meta.start_pos = c.pos_in_stream
res_meta.empty = False
break

for c in reversed(children):
if isinstance(c, Tree):
child_meta = c.meta
if not child_meta.empty:
res_meta.end_line = child_meta.end_line
res_meta.end_column = child_meta.end_column
res_meta.end_pos = child_meta.end_pos
res_meta.empty = False
break
elif isinstance(c, Token):
res_meta.end_line = c.end_line
res_meta.end_column = c.end_column
res_meta.end_pos = c.end_pos
res_meta.empty = False
break

src_meta = self._pp_get_meta(children)
if src_meta is not None:
res_meta.line = src_meta.line
res_meta.column = src_meta.column
res_meta.start_pos = src_meta.start_pos
res_meta.empty = False

src_meta = self._pp_get_meta(reversed(children))
if src_meta is not None:
res_meta.end_line = src_meta.end_line
res_meta.end_column = src_meta.end_column
res_meta.end_pos = src_meta.end_pos
res_meta.empty = False

return res

def _pp_get_meta(self, children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
return c

class PropagatePositions_IgnoreWs(PropagatePositions):
def _pp_get_meta(self, children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
if c and not c.isspace(): # Disregard whitespace-only tokens
return c


def make_propagate_positions(option):
if option == "ignore_ws":
return PropagatePositions_IgnoreWs
elif option is True:
return PropagatePositions
elif option is False:
return None

raise ConfigurationError('Invalid option for propagate_positions: %r' % option)


class ChildFilter:
def __init__(self, to_include, append_none, node_builder):
@@ -320,6 +333,8 @@ class ParseTreeBuilder:
self.rule_builders = list(self._init_builders(rules))

def _init_builders(self, rules):
propagate_positions = make_propagate_positions(self.propagate_positions)

for rule in rules:
options = rule.options
keep_all_tokens = options.keep_all_tokens
@@ -328,7 +343,7 @@ class ParseTreeBuilder:
wrapper_chain = list(filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
self.propagate_positions and PropagatePositions,
propagate_positions,
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
]))


Yükleniyor…
İptal
Kaydet