Browse Source

Refactor PropagatePositions, change Token.pos_in_stream to start_pos

Increased minor version, because it might invalidate serialized objects
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
Erez Sh 3 years ago
parent
commit
2409065529
4 changed files with 38 additions and 40 deletions
  1. +1
    -1
      lark/__init__.py
  2. +2
    -2
      lark/lark.py
  3. +12
    -6
      lark/lexer.py
  4. +23
    -31
      lark/parse_tree_builder.py

+ 1
- 1
lark/__init__.py View File

@@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
from .lexer import Token from .lexer import Token
from .lark import Lark from .lark import Lark


__version__ = "0.11.3"
__version__ = "0.11.4"

+ 2
- 2
lark/lark.py View File

@@ -573,7 +573,7 @@ class Lark(Serialize):


@property @property
def source(self): def source(self):
warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning)
warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning)
return self.source_path return self.source_path


@source.setter @source.setter
@@ -582,7 +582,7 @@ class Lark(Serialize):


@property @property
def grammar_source(self): def grammar_source(self):
warn("Lark.grammar_source attribute has been renamed to Lark.source_grammar", DeprecationWarning)
warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning)
return self.source_grammar return self.source_grammar


@grammar_source.setter @grammar_source.setter


+ 12
- 6
lark/lexer.py View File

@@ -6,6 +6,7 @@ from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken


###{standalone ###{standalone
from warnings import warn
from copy import copy from copy import copy




@@ -128,9 +129,9 @@ class Token(Str):
end_column will be 5. end_column will be 5.
end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``) end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``)
""" """
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')


def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
try: try:
self = super(Token, cls).__new__(cls, value) self = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError: except UnicodeDecodeError:
@@ -138,7 +139,7 @@ class Token(Str):
self = super(Token, cls).__new__(cls, value) self = super(Token, cls).__new__(cls, value)


self.type = type_ self.type = type_
self.pos_in_stream = pos_in_stream
self.start_pos = start_pos if start_pos is not None else pos_in_stream
self.value = value self.value = value
self.line = line self.line = line
self.column = column self.column = column
@@ -147,6 +148,11 @@ class Token(Str):
self.end_pos = end_pos self.end_pos = end_pos
return self return self


@property
def pos_in_stream(self):
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning)
return self.start_pos

def update(self, type_=None, value=None): def update(self, type_=None, value=None):
return Token.new_borrow_pos( return Token.new_borrow_pos(
type_ if type_ is not None else self.type, type_ if type_ is not None else self.type,
@@ -156,16 +162,16 @@ class Token(Str):


@classmethod @classmethod
def new_borrow_pos(cls, type_, value, borrow_t): def new_borrow_pos(cls, type_, value, borrow_t):
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)


def __reduce__(self): def __reduce__(self):
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column))
return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))


def __repr__(self): def __repr__(self):
return 'Token(%r, %r)' % (self.type, self.value) return 'Token(%r, %r)' % (self.type, self.value)


def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return Token(self.type, self.value, self.pos_in_stream, self.line, self.column)
return Token(self.type, self.value, self.start_pos, self.line, self.column)


def __eq__(self, other): def __eq__(self, other):
if isinstance(other, Token) and self.type != other.type: if isinstance(other, Token) and self.type != other.type:


+ 23
- 31
lark/parse_tree_builder.py View File

@@ -21,6 +21,15 @@ class ExpandSingleChild:
return self.node_builder(children) return self.node_builder(children)





def _pp_get_meta(children):
for c in children:
if isinstance(c, Tree):
if not c.meta.empty:
return c.meta
elif isinstance(c, Token):
return c

class PropagatePositions: class PropagatePositions:
def __init__(self, node_builder): def __init__(self, node_builder):
self.node_builder = node_builder self.node_builder = node_builder
@@ -31,37 +40,20 @@ class PropagatePositions:
# local reference to Tree.meta reduces number of presence checks # local reference to Tree.meta reduces number of presence checks
if isinstance(res, Tree): if isinstance(res, Tree):
res_meta = res.meta res_meta = res.meta
for c in children:
if isinstance(c, Tree):
child_meta = c.meta
if not child_meta.empty:
res_meta.line = child_meta.line
res_meta.column = child_meta.column
res_meta.start_pos = child_meta.start_pos
res_meta.empty = False
break
elif isinstance(c, Token):
res_meta.line = c.line
res_meta.column = c.column
res_meta.start_pos = c.pos_in_stream
res_meta.empty = False
break

for c in reversed(children):
if isinstance(c, Tree):
child_meta = c.meta
if not child_meta.empty:
res_meta.end_line = child_meta.end_line
res_meta.end_column = child_meta.end_column
res_meta.end_pos = child_meta.end_pos
res_meta.empty = False
break
elif isinstance(c, Token):
res_meta.end_line = c.end_line
res_meta.end_column = c.end_column
res_meta.end_pos = c.end_pos
res_meta.empty = False
break

src_meta = _pp_get_meta(children)
if src_meta:
res_meta.line = src_meta.line
res_meta.column = src_meta.column
res_meta.start_pos = src_meta.start_pos
res_meta.empty = False

src_meta = _pp_get_meta(reversed(children))
if src_meta:
res_meta.end_line = src_meta.end_line
res_meta.end_column = src_meta.end_column
res_meta.end_pos = src_meta.end_pos
res_meta.empty = False


return res return res




Loading…
Cancel
Save