This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

362 lines
10 KiB

  1. import hashlib
  2. import unicodedata
  3. import os
  4. from functools import reduce
  5. from collections import deque
  6. ###{standalone
  7. import sys, re
  8. import logging
  9. from io import open
  10. logger = logging.getLogger("lark")
  11. logger.addHandler(logging.StreamHandler())
  12. # Set to highest level, since we have some warnings amongst the code
  13. # By default, we should not output any log messages
  14. logger.setLevel(logging.CRITICAL)
  15. if sys.version_info[0]>2:
  16. from abc import ABC, abstractmethod
  17. else:
  18. from abc import ABCMeta, abstractmethod
  19. class ABC(object): # Provide Python27 compatibility
  20. __slots__ = ()
  21. __metclass__ = ABCMeta
  22. Py36 = (sys.version_info[:2] >= (3, 6))
  23. NO_VALUE = object()
  24. def classify(seq, key=None, value=None):
  25. d = {}
  26. for item in seq:
  27. k = key(item) if (key is not None) else item
  28. v = value(item) if (value is not None) else item
  29. if k in d:
  30. d[k].append(v)
  31. else:
  32. d[k] = [v]
  33. return d
  34. def _deserialize(data, namespace, memo):
  35. if isinstance(data, dict):
  36. if '__type__' in data: # Object
  37. class_ = namespace[data['__type__']]
  38. return class_.deserialize(data, memo)
  39. elif '@' in data:
  40. return memo[data['@']]
  41. return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
  42. elif isinstance(data, list):
  43. return [_deserialize(value, namespace, memo) for value in data]
  44. return data
  45. class Serialize(object):
  46. """Safe-ish serialization interface that doesn't rely on Pickle
  47. Attributes:
  48. __serialize_fields__ (List[str]): Fields (aka attributes) to serialize.
  49. __serialize_namespace__ (list): List of classes that deserialization is allowed to instantiate.
  50. Should include all field types that aren't builtin types.
  51. """
  52. def memo_serialize(self, types_to_memoize):
  53. memo = SerializeMemoizer(types_to_memoize)
  54. return self.serialize(memo), memo.serialize()
  55. def serialize(self, memo=None):
  56. if memo and memo.in_types(self):
  57. return {'@': memo.memoized.get(self)}
  58. fields = getattr(self, '__serialize_fields__')
  59. res = {f: _serialize(getattr(self, f), memo) for f in fields}
  60. res['__type__'] = type(self).__name__
  61. postprocess = getattr(self, '_serialize', None)
  62. if postprocess:
  63. postprocess(res, memo)
  64. return res
  65. @classmethod
  66. def deserialize(cls, data, memo):
  67. namespace = getattr(cls, '__serialize_namespace__', {})
  68. namespace = {c.__name__:c for c in namespace}
  69. fields = getattr(cls, '__serialize_fields__')
  70. if '@' in data:
  71. return memo[data['@']]
  72. inst = cls.__new__(cls)
  73. for f in fields:
  74. try:
  75. setattr(inst, f, _deserialize(data[f], namespace, memo))
  76. except KeyError as e:
  77. raise KeyError("Cannot find key for class", cls, e)
  78. postprocess = getattr(inst, '_deserialize', None)
  79. if postprocess:
  80. postprocess()
  81. return inst
  82. class SerializeMemoizer(Serialize):
  83. "A version of serialize that memoizes objects to reduce space"
  84. __serialize_fields__ = 'memoized',
  85. def __init__(self, types_to_memoize):
  86. self.types_to_memoize = tuple(types_to_memoize)
  87. self.memoized = Enumerator()
  88. def in_types(self, value):
  89. return isinstance(value, self.types_to_memoize)
  90. def serialize(self):
  91. return _serialize(self.memoized.reversed(), None)
  92. @classmethod
  93. def deserialize(cls, data, namespace, memo):
  94. return _deserialize(data, namespace, memo)
  95. try:
  96. STRING_TYPE = basestring
  97. except NameError: # Python 3
  98. STRING_TYPE = str
  99. import types
  100. from functools import wraps, partial
  101. from contextlib import contextmanager
  102. Str = type(u'')
  103. try:
  104. classtype = types.ClassType # Python2
  105. except AttributeError:
  106. classtype = type # Python3
  107. def smart_decorator(f, create_decorator):
  108. if isinstance(f, types.FunctionType):
  109. return wraps(f)(create_decorator(f, True))
  110. elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
  111. return wraps(f)(create_decorator(f, False))
  112. elif isinstance(f, types.MethodType):
  113. return wraps(f)(create_decorator(f.__func__, True))
  114. elif isinstance(f, partial):
  115. # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
  116. return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
  117. else:
  118. return create_decorator(f.__func__.__call__, True)
  119. try:
  120. import regex
  121. except ImportError:
  122. regex = None
  123. import sre_parse
  124. import sre_constants
  125. categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
  126. def get_regexp_width(expr):
  127. if regex:
  128. # Since `sre_parse` cannot deal with Unicode categories of the form `\p{Mn}`, we replace these with
  129. # a simple letter, which makes no difference as we are only trying to get the possible lengths of the regex
  130. # match here below.
  131. regexp_final = re.sub(categ_pattern, 'A', expr)
  132. else:
  133. if re.search(categ_pattern, expr):
  134. raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
  135. regexp_final = expr
  136. try:
  137. return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
  138. except sre_constants.error:
  139. if not regex:
  140. raise ValueError(expr)
  141. else:
  142. # sre_parse does not support the new features in regex. To not completely fail in that case,
  143. # we manually test for the most important info (whether the empty string is matched)
  144. c = regex.compile(regexp_final)
  145. if c.match('') is None:
  146. return 1, sre_constants.MAXREPEAT
  147. else:
  148. return 0, sre_constants.MAXREPEAT
  149. ###}
  150. _ID_START = 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Pc'
  151. _ID_CONTINUE = _ID_START + ('Nd', 'Nl',)
  152. def _test_unicode_category(s, categories):
  153. if len(s) != 1:
  154. return all(_test_unicode_category(char, categories) for char in s)
  155. return s == '_' or unicodedata.category(s) in categories
  156. def is_id_continue(s):
  157. """
  158. Checks if all characters in `s` are alphanumeric characters (Unicode standard, so diacritics, indian vowels, non-latin
  159. numbers, etc. all pass). Synonymous with a Python `ID_CONTINUE` identifier. See PEP 3131 for details.
  160. """
  161. return _test_unicode_category(s, _ID_CONTINUE)
  162. def is_id_start(s):
  163. """
  164. Checks if all characters in `s` are alphabetic characters (Unicode standard, so diacritics, indian vowels, non-latin
  165. numbers, etc. all pass). Synonymous with a Python `ID_START` identifier. See PEP 3131 for details.
  166. """
  167. return _test_unicode_category(s, _ID_START)
  168. def dedup_list(l):
  169. """Given a list (l) will removing duplicates from the list,
  170. preserving the original order of the list. Assumes that
  171. the list entries are hashable."""
  172. dedup = set()
  173. return [x for x in l if not (x in dedup or dedup.add(x))]
  174. try:
  175. from contextlib import suppress # Python 3
  176. except ImportError:
  177. @contextmanager
  178. def suppress(*excs):
  179. '''Catch and dismiss the provided exception
  180. >>> x = 'hello'
  181. >>> with suppress(IndexError):
  182. ... x = x[10]
  183. >>> x
  184. 'hello'
  185. '''
  186. try:
  187. yield
  188. except excs:
  189. pass
  190. class Enumerator(Serialize):
  191. def __init__(self):
  192. self.enums = {}
  193. def get(self, item):
  194. if item not in self.enums:
  195. self.enums[item] = len(self.enums)
  196. return self.enums[item]
  197. def __len__(self):
  198. return len(self.enums)
  199. def reversed(self):
  200. r = {v: k for k, v in self.enums.items()}
  201. assert len(r) == len(self.enums)
  202. return r
  203. def combine_alternatives(lists):
  204. """
  205. Accepts a list of alternatives, and enumerates all their possible concatinations.
  206. Examples:
  207. >>> combine_alternatives([range(2), [4,5]])
  208. [[0, 4], [0, 5], [1, 4], [1, 5]]
  209. >>> combine_alternatives(["abc", "xy", '$'])
  210. [['a', 'x', '$'], ['a', 'y', '$'], ['b', 'x', '$'], ['b', 'y', '$'], ['c', 'x', '$'], ['c', 'y', '$']]
  211. >>> combine_alternatives([])
  212. [[]]
  213. """
  214. if not lists:
  215. return [[]]
  216. assert all(l for l in lists), lists
  217. init = [[x] for x in lists[0]]
  218. return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
  219. try:
  220. import atomicwrites
  221. except ImportError:
  222. atomicwrites = None
  223. class FS:
  224. exists = os.path.exists
  225. @staticmethod
  226. def open(name, mode="r", **kwargs):
  227. if atomicwrites and "w" in mode:
  228. return atomicwrites.atomic_write(name, mode=mode, overwrite=True, **kwargs)
  229. else:
  230. return open(name, mode, **kwargs)
  231. def isascii(s):
  232. """ str.isascii only exists in python3.7+ """
  233. try:
  234. return s.isascii()
  235. except AttributeError:
  236. try:
  237. s.encode('ascii')
  238. return True
  239. except (UnicodeDecodeError, UnicodeEncodeError):
  240. return False
  241. class fzset(frozenset):
  242. def __repr__(self):
  243. return '{%s}' % ', '.join(map(repr, self))
  244. def classify_bool(seq, pred):
  245. true_elems = []
  246. false_elems = []
  247. for elem in seq:
  248. if pred(elem):
  249. true_elems.append(elem)
  250. else:
  251. false_elems.append(elem)
  252. return true_elems, false_elems
  253. def bfs(initial, expand):
  254. open_q = deque(list(initial))
  255. visited = set(open_q)
  256. while open_q:
  257. node = open_q.popleft()
  258. yield node
  259. for next_node in expand(node):
  260. if next_node not in visited:
  261. visited.add(next_node)
  262. open_q.append(next_node)
  263. def bfs_all_unique(initial, expand):
  264. "bfs, but doesn't keep track of visited (aka seen), because there can be no repetitions"
  265. open_q = deque(list(initial))
  266. while open_q:
  267. node = open_q.popleft()
  268. yield node
  269. open_q += expand(node)
  270. def _serialize(value, memo):
  271. if isinstance(value, Serialize):
  272. return value.serialize(memo)
  273. elif isinstance(value, list):
  274. return [_serialize(elem, memo) for elem in value]
  275. elif isinstance(value, frozenset):
  276. return list(value) # TODO reversible?
  277. elif isinstance(value, dict):
  278. return {key:_serialize(elem, memo) for key, elem in value.items()}
  279. # assert value is None or isinstance(value, (int, float, str, tuple)), value
  280. return value