Browse Source

Added Token.end_pos, and updated docs regarding recent commits

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Shinan 5 years ago
parent
commit
58d6d9fac1
4 changed files with 13 additions and 11 deletions
  1. +6
    -6
      docs/recipes.md
  2. +1
    -1
      examples/custom_lexer.py
  3. +5
    -3
      lark/lexer.py
  4. +1
    -1
      lark/parse_tree_builder.py

+ 6
- 6
docs/recipes.md View File

@@ -19,18 +19,18 @@ It only works with the standard and contextual lexers.
### Example 1: Replace string values with ints for INT tokens

```python
from lark import Lark, Token
from lark import Lark, Transformer

def tok_to_int(tok):
"Convert the value of `tok` from string to int, while maintaining line number & column."
# tok.type == 'INT'
return Token.new_borrow_pos(tok.type, int(tok), tok)
class T(Transformer):
def INT(self, tok):
"Convert the value of `tok` from string to int, while maintaining line number & column."
return tok.update(value=int(tok))

parser = Lark("""
start: INT*
%import common.INT
%ignore " "
""", parser="lalr", lexer_callbacks = {'INT': tok_to_int})
""", parser="lalr", transformer=T())

print(parser.parse('3 14 159'))
```


+ 1
- 1
examples/custom_lexer.py View File

@@ -29,7 +29,7 @@ parser = Lark("""
data_item: STR INT*

%declare STR INT
""", parser='lalr', lexer=TypeLexer, propagate_positions=False)
""", parser='lalr', lexer=TypeLexer)


class ParseToDict(Transformer):


+ 5
- 3
lark/lexer.py View File

@@ -90,9 +90,9 @@ class TerminalDef(Serialize):


class Token(Str):
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column')
__slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')

def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None):
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
try:
self = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError:
@@ -106,6 +106,7 @@ class Token(Str):
self.column = column
self.end_line = end_line
self.end_column = end_column
self.end_pos = end_pos
return self

def update(self, type_=None, value=None):
@@ -117,7 +118,7 @@ class Token(Str):

@classmethod
def new_borrow_pos(cls, type_, value, borrow_t):
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column)
return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)

def __reduce__(self):
return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
@@ -187,6 +188,7 @@ class _Lex:
line_ctr.feed(value, type_ in newline_types)
t.end_line = line_ctr.line
t.end_column = line_ctr.column
t.end_pos = line_ctr.char_pos
if t.type in lexer.callback:
t = lexer.callback[t.type](t)
if not isinstance(t, Token):


+ 1
- 1
lark/parse_tree_builder.py View File

@@ -51,7 +51,7 @@ class PropagatePositions:
elif isinstance(c, Token):
res.meta.end_line = c.end_line
res.meta.end_column = c.end_column
res.meta.end_pos = c.pos_in_stream + len(c.value)
res.meta.end_pos = c.end_pos
res.meta.empty = False
break



Loading…
Cancel
Save