|
- #
- # This example demonstrates using Lark with a custom lexer.
- #
- # You can use a custom lexer to tokenize text when the lexers offered by Lark
- # are too slow, or not flexible enough.
- #
- # You can also use it (as shown in this example) to tokenize streams of objects.
- #
-
-
- from lark import Lark, Transformer, v_args
- from lark.lexer import Lexer, Token
-
- class TypeLexer(Lexer):
- def __init__(self, lexer_conf):
- pass
-
- def lex(self, data):
- for obj in data:
- if isinstance(obj, int):
- yield Token('INT', obj)
- elif isinstance(obj, (type(''), type(u''))):
- yield Token('STR', obj)
- else:
- raise TypeError(obj)
-
- parser = Lark("""
- start: data_item+
- data_item: STR INT*
-
- %declare STR INT
- """, parser='lalr', lexer=TypeLexer)
-
-
- class ParseToDict(Transformer):
- @v_args(inline=True)
- def data_item(self, name, *numbers):
- return name.value, [n.value for n in numbers]
-
- start = dict
-
-
- def test():
- data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
-
- print(data)
-
- tree = parser.parse(data)
- res = ParseToDict().transform(tree)
-
- print('-->')
- print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
-
-
- if __name__ == '__main__':
- test()
|