# # This example demonstrates using Lark with a custom lexer. # # You can use a custom lexer to tokenize text when the lexers offered by Lark # are too slow, or not flexible enough. # # You can also use it (as shown in this example) to tokenize streams of objects. # from lark import Lark, Transformer, v_args from lark.lexer import Lexer, Token class TypeLexer(Lexer): def __init__(self, lexer_conf): pass def lex(self, data): for obj in data: if isinstance(obj, int): yield Token('INT', obj) elif isinstance(obj, (type(''), type(u''))): yield Token('STR', obj) else: raise TypeError(obj) parser = Lark(""" start: data_item+ data_item: STR INT* %declare STR INT """, parser='lalr', lexer=TypeLexer) class ParseToDict(Transformer): @v_args(inline=True) def data_item(self, name, *numbers): return name.value, [n.value for n in numbers] start = dict def test(): data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6] print(data) tree = parser.parse(data) res = ParseToDict().transform(tree) print('-->') print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]} if __name__ == '__main__': test()