This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

341 lines
9.5 KiB

  1. import unicodedata
  2. import os
  3. from functools import reduce
  4. from collections import deque
  5. ###{standalone
  6. import sys, re
  7. import logging
  8. logger = logging.getLogger("lark")
  9. logger.addHandler(logging.StreamHandler())
  10. # Set to highest level, since we have some warnings amongst the code
  11. # By default, we should not output any log messages
  12. logger.setLevel(logging.CRITICAL)
  13. Py36 = (sys.version_info[:2] >= (3, 6))
  14. NO_VALUE = object()
  15. def classify(seq, key=None, value=None):
  16. d = {}
  17. for item in seq:
  18. k = key(item) if (key is not None) else item
  19. v = value(item) if (value is not None) else item
  20. if k in d:
  21. d[k].append(v)
  22. else:
  23. d[k] = [v]
  24. return d
  25. def _deserialize(data, namespace, memo):
  26. if isinstance(data, dict):
  27. if '__type__' in data: # Object
  28. class_ = namespace[data['__type__']]
  29. return class_.deserialize(data, memo)
  30. elif '@' in data:
  31. return memo[data['@']]
  32. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  33. elif isinstance(data, list):
  34. return [_deserialize(value, namespace, memo) for value in data]
  35. return data
  36. class Serialize(object):
  37. """Safe-ish serialization interface that doesn't rely on Pickle
  38. Attributes:
  39. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  40. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  41. Should include all field types that aren't builtin types.
  42. """
  43. def memo_serialize(self, types_to_memoize):
  44. memo = SerializeMemoizer(types_to_memoize)
  45. return self.serialize(memo), memo.serialize()
  46. def serialize(self, memo=None):
  47. if memo and memo.in_types(self):
  48. return {'@': memo.memoized.get(self)}
  49. fields = getattr(self, '__serialize_fields__')
  50. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  51. res['__type__'] = type(self).__name__
  52. postprocess = getattr(self, '_serialize', None)
  53. if postprocess:
  54. postprocess(res, memo)
  55. return res
  56. @classmethod
  57. def deserialize(cls, data, memo):
  58. namespace = getattr(cls, '__serialize_namespace__', {})
  59. namespace = {c.__name__:c for c in namespace}
  60. fields = getattr(cls, '__serialize_fields__')
  61. if '@' in data:
  62. return memo[data['@']]
  63. inst = cls.__new__(cls)
  64. for f in fields:
  65. try:
  66. setattr(inst, f, _deserialize(data[f], namespace, memo))
  67. except KeyError as e:
  68. raise KeyError("Cannot find key for class", cls, e)
  69. postprocess = getattr(inst, '_deserialize', None)
  70. if postprocess:
  71. postprocess()
  72. return inst
  73. class SerializeMemoizer(Serialize):
  74. "A version of serialize that memoizes objects to reduce space"
  75. __serialize_fields__ = 'memoized',
  76. def __init__(self, types_to_memoize):
  77. self.types_to_memoize = tuple(types_to_memoize)
  78. self.memoized = Enumerator()
  79. def in_types(self, value):
  80. return isinstance(value, self.types_to_memoize)
  81. def serialize(self):
  82. return _serialize(self.memoized.reversed(), None)
  83. @classmethod
  84. def deserialize(cls, data, namespace, memo):
  85. return _deserialize(data, namespace, memo)
  86. try:
  87. STRING_TYPE = basestring
  88. except NameError: # Python 3
  89. STRING_TYPE = str
  90. import types
  91. from functools import wraps, partial
  92. from contextlib import contextmanager
  93. Str = type(u'')
  94. try:
  95. classtype = types.ClassType # Python2
  96. except AttributeError:
  97. classtype = type # Python3
  98. def smart_decorator(f, create_decorator):
  99. if isinstance(f, types.FunctionType):
  100. return wraps(f)(create_decorator(f, True))
  101. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  102. return wraps(f)(create_decorator(f, False))
  103. elif isinstance(f, types.MethodType):
  104. return wraps(f)(create_decorator(f.__func__, True))
  105. elif isinstance(f, partial):
  106. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  107. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  108. else:
  109. return create_decorator(f.__func__.__call__, True)
  110. try:
  111. import regex
  112. except ImportError:
  113. regex = None
  114. import sre_parse
  115. import sre_constants
  116. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  117. def get_regexp_width(expr):
  118. if regex:
  119. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  120. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  121. # match here below.
  122. regexp_final = re.sub(categ_pattern, 'A', expr)
  123. else:
  124. if re.search(categ_pattern, expr):
  125. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  126. regexp_final = expr
  127. try:
  128. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  129. except sre_constants.error:
  130. raise ValueError(expr)
  131. ###}
  132. _ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
  133. _ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
  134. def _test_unicode_category(s, categories):
  135. if len(s) != 1:
  136. return all(_test_unicode_category(char, categories) for char in s)
  137. return s == '_' or unicodedata.category(s) in categories
  138. def is_id_continue(s):
  139. """
  140. Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
  141. numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
  142. """
  143. return _test_unicode_category(s, _ID_CONTINUE)
  144. def is_id_start(s):
  145. """
  146. Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
  147. numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
  148. """
  149. return _test_unicode_category(s, _ID_START)
  150. def dedup_list(l):
  151. """Given a list (l) will removing duplicates from the list,
  152. preserving the original order of the list. Assumes that
  153. the list entries are hashable."""
  154. dedup = set()
  155. return [x for x in l if not (x in dedup or dedup.add(x))]
  156. try:
  157. from contextlib import suppress # Python 3
  158. except ImportError:
  159. @contextmanager
  160. def suppress(*excs):
  161. '''Catch and dismiss the provided exception
  162. >>> x = 'hello'
  163. >>> with suppress(IndexError):
  164. ... x = x[10]
  165. >>> x
  166. 'hello'
  167. '''
  168. try:
  169. yield
  170. except excs:
  171. pass
  172. try:
  173. compare = cmp
  174. except NameError:
  175. def compare(a, b):
  176. if a == b:
  177. return 0
  178. elif a > b:
  179. return 1
  180. return -1
  181. class Enumerator(Serialize):
  182. def __init__(self):
  183. self.enums = {}
  184. def get(self, item):
  185. if item not in self.enums:
  186. self.enums[item] = len(self.enums)
  187. return self.enums[item]
  188. def __len__(self):
  189. return len(self.enums)
  190. def reversed(self):
  191. r = {v: k for k, v in self.enums.items()}
  192. assert len(r) == len(self.enums)
  193. return r
  194. def combine_alternatives(lists):
  195. """
  196. Accepts a list of alternatives, and enumerates all their possible concatinations.
  197. Examples:
  198. >>> combine_alternatives([range(2), [4,5]])
  199. [[0, 4], [0, 5], [1, 4], [1, 5]]
  200. >>> combine_alternatives(["abc", "xy", '$'])
  201. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  202. >>> combine_alternatives([])
  203. [[]]
  204. """
  205. if not lists:
  206. return [[]]
  207. assert all(l for l in lists), lists
  208. init = [[x] for x in lists[0]]
  209. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  210. class FS:
  211. open = open
  212. exists = os.path.exists
  213. def isascii(s):
  214. """ str.isascii only exists in python3.7+ """
  215. try:
  216. return s.isascii()
  217. except AttributeError:
  218. try:
  219. s.encode('ascii')
  220. return True
  221. except (UnicodeDecodeError, UnicodeEncodeError):
  222. return False
  223. class fzset(frozenset):
  224. def __repr__(self):
  225. return '{%s}' % ', '.join(map(repr, self))
  226. def classify_bool(seq, pred):
  227. true_elems = []
  228. false_elems = []
  229. for elem in seq:
  230. if pred(elem):
  231. true_elems.append(elem)
  232. else:
  233. false_elems.append(elem)
  234. return true_elems, false_elems
  235. def bfs(initial, expand):
  236. open_q = deque(list(initial))
  237. visited = set(open_q)
  238. while open_q:
  239. node = open_q.popleft()
  240. yield node
  241. for next_node in expand(node):
  242. if next_node not in visited:
  243. visited.add(next_node)
  244. open_q.append(next_node)
  245. def bfs_all_unique(initial, expand):
  246. "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
  247. open_q = deque(list(initial))
  248. while open_q:
  249. node = open_q.popleft()
  250. yield node
  251. open_q += expand(node)
  252. def _serialize(value, memo):
  253. if isinstance(value, Serialize):
  254. return value.serialize(memo)
  255. elif isinstance(value, list):
  256. return [_serialize(elem, memo) for elem in value]
  257. elif isinstance(value, frozenset):
  258. return list(value) # TODO reversible?
  259. elif isinstance(value, dict):
  260. return {key:_serialize(elem, memo) for key, elem in value.items()}
  261. # assert value is None or isinstance(value, (int, float, str, tuple)), value
  262. return value