This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.3 KiB

  1. #
  2. # This example demonstrates using Lark with a custom lexer.
  3. #
  4. # You can use a custom lexer to tokenize text when the lexers offered by Lark
  5. # are too slow, or not flexible enough.
  6. #
  7. # You can also use it (as shown in this example) to tokenize streams of objects.
  8. #
  9. from lark import Lark, Transformer, v_args
  10. from lark.lexer import Lexer, Token
  11. class TypeLexer(Lexer):
  12. def __init__(self, lexer_conf):
  13. pass
  14. def lex(self, data):
  15. print(data)
  16. for obj in data:
  17. if isinstance(obj, int):
  18. yield Token('INT', obj)
  19. elif isinstance(obj, (type(''), type(u''))):
  20. yield Token('STR', obj)
  21. else:
  22. raise TypeError(obj)
  23. parser = Lark("""
  24. start: data_item+
  25. data_item: STR INT*
  26. %declare STR INT
  27. """, parser='lalr', lexer=TypeLexer)
  28. class ParseToDict(Transformer):
  29. @v_args(inline=True)
  30. def data_item(self, name, *numbers):
  31. return name.value, [n.value for n in numbers]
  32. start = dict
  33. def test():
  34. data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
  35. tree = parser.parse(data)
  36. res = ParseToDict().transform(tree)
  37. print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
  38. if __name__ == '__main__':
  39. test()