This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
1.3 KiB

  1. """
  2. Custom lexer
  3. ============
  4. Demonstrates using a custom lexer to parse a non-textual stream of data
  5. You can use a custom lexer to tokenize text when the lexers offered by Lark
  6. are too slow, or not flexible enough.
  7. You can also use it (as shown in this example) to tokenize streams of objects.
  8. """
  9. from lark import Lark, Transformer, v_args
  10. from lark.lexer import Lexer, Token
  11. class TypeLexer(Lexer):
  12. def __init__(self, lexer_conf):
  13. pass
  14. def lex(self, data):
  15. for obj in data:
  16. if isinstance(obj, int):
  17. yield Token('INT', obj)
  18. elif isinstance(obj, (type(''), type(u''))):
  19. yield Token('STR', obj)
  20. else:
  21. raise TypeError(obj)
  22. parser = Lark("""
  23. start: data_item+
  24. data_item: STR INT*
  25. %declare STR INT
  26. """, parser='lalr', lexer=TypeLexer)
  27. class ParseToDict(Transformer):
  28. @v_args(inline=True)
  29. def data_item(self, name, *numbers):
  30. return name.value, [n.value for n in numbers]
  31. start = dict
  32. def test():
  33. data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6]
  34. print(data)
  35. tree = parser.parse(data)
  36. res = ParseToDict().transform(tree)
  37. print('-->')
  38. print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]}
  39. if __name__ == '__main__':
  40. test()