This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.3 KiB

  1. #
  2. # This example demonstrates using Lark with a custom lexer.
  3. #
  4. # You can use a custom lexer to tokenize text when the lexers offered by Lark
  5. # are too slow, or not flexible enough.
  6. #
  7. # You can also use it (as shown in this example) to tokenize streams of objects.
  8. #
  9. from lark import Lark, Transformer, v_args
  10. from lark.lexer import Lexer, Token
  11. class TypeLexer(Lexer):
  12. def __init__(self, lexer_conf):
  13. pass
  14. def lex(self, data):
  15. for obj in data:
  16. if isinstance(obj, int):
  17. yield Token('INT', obj)
  18. elif isinstance(obj, (type(''), type(u''))):
  19. yield Token('STR', obj)
  20. else:
  21. raise TypeError(obj)
  22. parser = Lark("""
  23. start: data_item+
  24. data_item: STR INT*
  25. %declare STR INT
  26. """, parser='lalr', lexer=TypeLexer)
  27. class ParseToDict(Transformer):
  28. @v_args(inline=True)
  29. def data_item(self, name, *numbers):
  30. return name.value, [n.value for n in numbers]
  31. start = dict
  32. def test():
  33. data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
  34. print(data)
  35. tree = parser.parse(data)
  36. res = ParseToDict().transform(tree)
  37. print('-->')
  38. print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
  39. if __name__ == '__main__':
  40. test()