This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.5 KiB

  1. """
  2. Simple JSON Parser
  3. ==================
  4. A simple JSON parser (comes with a tutorial, see docs)
  5. The code is short and clear, and outperforms every other parser (that's written in Python).
  6. For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md
  7. """
  8. import sys
  9. from lark import Lark, Transformer, v_args
  10. json_grammar = r"""
  11. ?start: value
  12. ?value: object
  13. | array
  14. | string
  15. | SIGNED_NUMBER -> number
  16. | "true" -> true
  17. | "false" -> false
  18. | "null" -> null
  19. array : "[" [value ("," value)*] "]"
  20. object : "{" [pair ("," pair)*] "}"
  21. pair : string ":" value
  22. string : ESCAPED_STRING
  23. %import common.ESCAPED_STRING
  24. %import common.SIGNED_NUMBER
  25. %import common.WS
  26. %ignore WS
  27. """
  28. class TreeToJson(Transformer):
  29. @v_args(inline=True)
  30. def string(self, s):
  31. return s[1:-1].replace('\\"', '"')
  32. array = list
  33. pair = tuple
  34. object = dict
  35. number = v_args(inline=True)(float)
  36. null = lambda self, _: None
  37. true = lambda self, _: True
  38. false = lambda self, _: False
  39. ### Create the JSON parser with Lark, using the Earley algorithm
  40. # json_parser = Lark(json_grammar, parser='earley', lexer='standard')
  41. # def parse(x):
  42. # return TreeToJson().transform(json_parser.parse(x))
  43. ### Create the JSON parser with Lark, using the LALR algorithm
  44. json_parser = Lark(json_grammar, parser='lalr',
  45. # Using the standard lexer isn't required, and isn't usually recommended.
  46. # But, it's good enough for JSON, and it's slightly faster.
  47. lexer='standard',
  48. # Disabling propagate_positions and placeholders slightly improves speed
  49. propagate_positions=False,
  50. maybe_placeholders=False,
  51. # Using an internal transformer is faster and more memory efficient
  52. transformer=TreeToJson())
  53. parse = json_parser.parse
  54. def test():
  55. test_json = '''
  56. {
  57. "empty_object" : {},
  58. "empty_array" : [],
  59. "booleans" : { "YES" : true, "NO" : false },
  60. "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
  61. "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ],
  62. "nothing" : null
  63. }
  64. '''
  65. j = parse(test_json)
  66. print(j)
  67. import json
  68. assert j == json.loads(test_json)
  69. if __name__ == '__main__':
  70. # test()
  71. with open(sys.argv[1]) as f:
  72. print(parse(f.read()))