|  | #
# This example demonstrates using Lark with a custom lexer.
#
# You can use a custom lexer to tokenize text when the lexers offered by Lark
# are too slow, or not flexible enough.
#
# You can also use it (as shown in this example) to tokenize streams of objects.
#
from lark import Lark, Transformer, v_args
from lark.lexer import Lexer, Token
class TypeLexer(Lexer):
    def __init__(self, lexer_conf):
        pass
    def lex(self, data):
        for obj in data:
            if isinstance(obj, int):
                yield Token('INT', obj)
            elif isinstance(obj, (type(''), type(u''))):
                yield Token('STR', obj)
            else:
                raise TypeError(obj)
parser = Lark("""
        start: data_item+
        data_item: STR INT*
        %declare STR INT
        """, parser='lalr', lexer=TypeLexer)
class ParseToDict(Transformer):
    @v_args(inline=True)
    def data_item(self, name, *numbers):
        return name.value, [n.value for n in numbers]
    start = dict
def test():
    data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
    print(data)
    tree = parser.parse(data)
    res = ParseToDict().transform(tree)
    print('-->')
    print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
if __name__ == '__main__':
    test()
 |