This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

333 linhas
9.3 KiB

  1. import unicodedata
  2. import os
  3. from functools import reduce
  4. from collections import deque
  5. ###{standalone
  6. import sys, re
  7. import logging
  8. logger = logging.getLogger("lark")
  9. logger.addHandler(logging.StreamHandler())
  10. # Set to highest level, since we have some warnings amongst the code
  11. # By default, we should not output any log messages
  12. logger.setLevel(logging.CRITICAL)
  13. Py36 = (sys.version_info[:2] >= (3, 6))
  14. NO_VALUE = object()
  15. def classify(seq, key=None, value=None):
  16. d = {}
  17. for item in seq:
  18. k = key(item) if (key is not None) else item
  19. v = value(item) if (value is not None) else item
  20. if k in d:
  21. d[k].append(v)
  22. else:
  23. d[k] = [v]
  24. return d
  25. def _deserialize(data, namespace, memo):
  26. if isinstance(data, dict):
  27. if '__type__' in data: # Object
  28. class_ = namespace[data['__type__']]
  29. return class_.deserialize(data, memo)
  30. elif '@' in data:
  31. return memo[data['@']]
  32. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  33. elif isinstance(data, list):
  34. return [_deserialize(value, namespace, memo) for value in data]
  35. return data
  36. class Serialize(object):
  37. """Safe-ish serialization interface that doesn't rely on Pickle
  38. Attributes:
  39. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  40. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  41. Should include all field types that aren't builtin types.
  42. """
  43. def memo_serialize(self, types_to_memoize):
  44. memo = SerializeMemoizer(types_to_memoize)
  45. return self.serialize(memo), memo.serialize()
  46. def serialize(self, memo=None):
  47. if memo and memo.in_types(self):
  48. return {'@': memo.memoized.get(self)}
  49. fields = getattr(self, '__serialize_fields__')
  50. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  51. res['__type__'] = type(self).__name__
  52. postprocess = getattr(self, '_serialize', None)
  53. if postprocess:
  54. postprocess(res, memo)
  55. return res
  56. @classmethod
  57. def deserialize(cls, data, memo):
  58. namespace = getattr(cls, '__serialize_namespace__', {})
  59. namespace = {c.__name__:c for c in namespace}
  60. fields = getattr(cls, '__serialize_fields__')
  61. if '@' in data:
  62. return memo[data['@']]
  63. inst = cls.__new__(cls)
  64. for f in fields:
  65. try:
  66. setattr(inst, f, _deserialize(data[f], namespace, memo))
  67. except KeyError as e:
  68. raise KeyError("Cannot find key for class", cls, e)
  69. postprocess = getattr(inst, '_deserialize', None)
  70. if postprocess:
  71. postprocess()
  72. return inst
  73. class SerializeMemoizer(Serialize):
  74. "A version of serialize that memoizes objects to reduce space"
  75. __serialize_fields__ = 'memoized',
  76. def __init__(self, types_to_memoize):
  77. self.types_to_memoize = tuple(types_to_memoize)
  78. self.memoized = Enumerator()
  79. def in_types(self, value):
  80. return isinstance(value, self.types_to_memoize)
  81. def serialize(self):
  82. return _serialize(self.memoized.reversed(), None)
  83. @classmethod
  84. def deserialize(cls, data, namespace, memo):
  85. return _deserialize(data, namespace, memo)
  86. try:
  87. STRING_TYPE = basestring
  88. except NameError: # Python 3
  89. STRING_TYPE = str
  90. import types
  91. from functools import wraps, partial
  92. from contextlib import contextmanager
  93. Str = type(u'')
  94. try:
  95. classtype = types.ClassType # Python2
  96. except AttributeError:
  97. classtype = type # Python3
  98. def smart_decorator(f, create_decorator):
  99. if isinstance(f, types.FunctionType):
  100. return wraps(f)(create_decorator(f, True))
  101. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  102. return wraps(f)(create_decorator(f, False))
  103. elif isinstance(f, types.MethodType):
  104. return wraps(f)(create_decorator(f.__func__, True))
  105. elif isinstance(f, partial):
  106. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  107. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  108. else:
  109. return create_decorator(f.__func__.__call__, True)
  110. try:
  111. import regex
  112. except ImportError:
  113. regex = None
  114. import sre_parse
  115. import sre_constants
  116. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  117. def get_regexp_width(expr):
  118. if regex:
  119. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  120. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  121. # match here below.
  122. regexp_final = re.sub(categ_pattern, 'A', expr)
  123. else:
  124. if re.search(categ_pattern, expr):
  125. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  126. regexp_final = expr
  127. try:
  128. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  129. except sre_constants.error:
  130. raise ValueError(expr)
  131. ###}
  132. _ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
  133. _ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
  134. def _test_unicode_category(s, categories):
  135. if len(s) != 1:
  136. return all(_test_unicode_category(char, categories) for char in s)
  137. return s == '_' or unicodedata.category(s) in categories
  138. def is_id_continue(s):
  139. """
  140. Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
  141. numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
  142. """
  143. return _test_unicode_category(s, _ID_CONTINUE)
  144. def is_id_start(s):
  145. """
  146. Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
  147. numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
  148. """
  149. return _test_unicode_category(s, _ID_START)
  150. def dedup_list(l):
  151. """Given a list (l) will removing duplicates from the list,
  152. preserving the original order of the list. Assumes that
  153. the list entries are hashable."""
  154. dedup = set()
  155. return [x for x in l if not (x in dedup or dedup.add(x))]
  156. try:
  157. from contextlib import suppress # Python 3
  158. except ImportError:
  159. @contextmanager
  160. def suppress(*excs):
  161. '''Catch and dismiss the provided exception
  162. >>> x = 'hello'
  163. >>> with suppress(IndexError):
  164. ... x = x[10]
  165. >>> x
  166. 'hello'
  167. '''
  168. try:
  169. yield
  170. except excs:
  171. pass
  172. try:
  173. compare = cmp
  174. except NameError:
  175. def compare(a, b):
  176. if a == b:
  177. return 0
  178. elif a > b:
  179. return 1
  180. return -1
  181. class Enumerator(Serialize):
  182. def __init__(self):
  183. self.enums = {}
  184. def get(self, item):
  185. if item not in self.enums:
  186. self.enums[item] = len(self.enums)
  187. return self.enums[item]
  188. def __len__(self):
  189. return len(self.enums)
  190. def reversed(self):
  191. r = {v: k for k, v in self.enums.items()}
  192. assert len(r) == len(self.enums)
  193. return r
  194. def combine_alternatives(lists):
  195. """
  196. Accepts a list of alternatives, and enumerates all their possible concatinations.
  197. Examples:
  198. >>> combine_alternatives([range(2), [4,5]])
  199. [[0, 4], [0, 5], [1, 4], [1, 5]]
  200. >>> combine_alternatives(["abc", "xy", '$'])
  201. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  202. >>> combine_alternatives([])
  203. [[]]
  204. """
  205. if not lists:
  206. return [[]]
  207. assert all(l for l in lists), lists
  208. init = [[x] for x in lists[0]]
  209. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  210. class FS:
  211. open = open
  212. exists = os.path.exists
  213. def isascii(s):
  214. """ str.isascii only exists in python3.7+ """
  215. try:
  216. return s.isascii()
  217. except AttributeError:
  218. try:
  219. s.encode('ascii')
  220. return True
  221. except (UnicodeDecodeError, UnicodeEncodeError):
  222. return False
  223. class fzset(frozenset):
  224. def __repr__(self):
  225. return '{%s}' % ', '.join(map(repr, self))
  226. def classify_bool(seq, pred):
  227. true_elems = []
  228. false_elems = []
  229. for elem in seq:
  230. if pred(elem):
  231. true_elems.append(elem)
  232. else:
  233. false_elems.append(elem)
  234. return true_elems, false_elems
  235. def bfs(initial, expand):
  236. open_q = deque(list(initial))
  237. visited = set(open_q)
  238. while open_q:
  239. node = open_q.popleft()
  240. yield node
  241. for next_node in expand(node):
  242. if next_node not in visited:
  243. visited.add(next_node)
  244. open_q.append(next_node)
  245. def _serialize(value, memo):
  246. if isinstance(value, Serialize):
  247. return value.serialize(memo)
  248. elif isinstance(value, list):
  249. return [_serialize(elem, memo) for elem in value]
  250. elif isinstance(value, frozenset):
  251. return list(value) # TODO reversible?
  252. elif isinstance(value, dict):
  253. return {key:_serialize(elem, memo) for key, elem in value.items()}
  254. # assert value is None or isinstance(value, (int, float, str, tuple)), value
  255. return value