From 58d6d9fac1883476ea890634124fbfbabc952650 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sun, 17 Nov 2019 16:10:54 +0200 Subject: [PATCH] Added Token.end_pos, and updated docs regarding recent commits --- docs/recipes.md | 12 ++++++------ examples/custom_lexer.py | 2 +- lark/lexer.py | 8 +++++--- lark/parse_tree_builder.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/recipes.md b/docs/recipes.md index 2202ab7..4e734e7 100644 --- a/docs/recipes.md +++ b/docs/recipes.md @@ -19,18 +19,18 @@ It only works with the standard and contextual lexers. ### Example 1: Replace string values with ints for INT tokens ```python -from lark import Lark, Token +from lark import Lark, Transformer -def tok_to_int(tok): - "Convert the value of `tok` from string to int, while maintaining line number & column." - # tok.type == 'INT' - return Token.new_borrow_pos(tok.type, int(tok), tok) +class T(Transformer): + def INT(self, tok): + "Convert the value of `tok` from string to int, while maintaining line number & column." + return tok.update(value=int(tok)) parser = Lark(""" start: INT* %import common.INT %ignore " " -""", parser="lalr", lexer_callbacks = {'INT': tok_to_int}) +""", parser="lalr", transformer=T()) print(parser.parse('3 14 159')) ``` diff --git a/examples/custom_lexer.py b/examples/custom_lexer.py index 732e614..786bf4f 100644 --- a/examples/custom_lexer.py +++ b/examples/custom_lexer.py @@ -29,7 +29,7 @@ parser = Lark(""" data_item: STR INT* %declare STR INT - """, parser='lalr', lexer=TypeLexer, propagate_positions=False) + """, parser='lalr', lexer=TypeLexer) class ParseToDict(Transformer): diff --git a/lark/lexer.py b/lark/lexer.py index 21951e4..871b25e 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -90,9 +90,9 @@ class TerminalDef(Serialize): class Token(Str): - __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') + __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') - def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None): + def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): try: self = super(Token, cls).__new__(cls, value) except UnicodeDecodeError: @@ -106,6 +106,7 @@ class Token(Str): self.column = column self.end_line = end_line self.end_column = end_column + self.end_pos = end_pos return self def update(self, type_=None, value=None): @@ -117,7 +118,7 @@ class Token(Str): @classmethod def new_borrow_pos(cls, type_, value, borrow_t): - return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column) + return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) def __reduce__(self): return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) @@ -187,6 +188,7 @@ class _Lex: line_ctr.feed(value, type_ in newline_types) t.end_line = line_ctr.line t.end_column = line_ctr.column + t.end_pos = line_ctr.char_pos if t.type in lexer.callback: t = lexer.callback[t.type](t) if not isinstance(t, Token): diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index b54b6e8..3c47ef0 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -51,7 +51,7 @@ class PropagatePositions: elif isinstance(c, Token): res.meta.end_line = c.end_line res.meta.end_column = c.end_column - res.meta.end_pos = c.pos_in_stream + len(c.value) + res.meta.end_pos = c.end_pos res.meta.empty = False break