This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
2.9 KiB

  1. import re
  2. import sre_parse
  3. import sys
  4. Py36 = (sys.version_info[:2] >= (3, 6))
  5. ###{standalone
  6. def is_terminal(sym):
  7. return sym.isupper()
  8. class GrammarError(Exception):
  9. pass
  10. class ParseError(Exception):
  11. pass
  12. class UnexpectedToken(ParseError):
  13. def __init__(self, token, expected, seq, index):
  14. self.token = token
  15. self.expected = expected
  16. self.line = getattr(token, 'line', '?')
  17. self.column = getattr(token, 'column', '?')
  18. try:
  19. context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
  20. except AttributeError:
  21. context = seq[index:index+5]
  22. except TypeError:
  23. context = "<no context>"
  24. message = ("Unexpected token %r at line %s, column %s.\n"
  25. "Expected: %s\n"
  26. "Context: %s" % (token, self.line, self.column, expected, context))
  27. super(UnexpectedToken, self).__init__(message)
  28. ###}
  29. class LexerConf:
  30. def __init__(self, tokens, ignore=(), postlex=None):
  31. self.tokens = tokens
  32. self.ignore = ignore
  33. self.postlex = postlex
  34. class ParserConf:
  35. def __init__(self, rules, callback, start):
  36. self.rules = rules
  37. self.callback = callback
  38. self.start = start
  39. class Pattern(object):
  40. def __init__(self, value, flags=()):
  41. self.value = value
  42. self.flags = frozenset(flags)
  43. def __repr__(self):
  44. return repr(self.to_regexp())
  45. # Pattern Hashing assumes all subclasses have a different priority!
  46. def __hash__(self):
  47. return hash((type(self), self.value, self.flags))
  48. def __eq__(self, other):
  49. return type(self) == type(other) and self.value == other.value and self.flags == other.flags
  50. if Py36:
  51. # Python 3.6 changed syntax for flags in regular expression
  52. def _get_flags(self, value):
  53. for f in self.flags:
  54. value = ('(?%s:%s)' % (f, value))
  55. return value
  56. else:
  57. def _get_flags(self, value):
  58. for f in self.flags:
  59. value = ('(?%s)' % f) + value
  60. return value
  61. class PatternStr(Pattern):
  62. def to_regexp(self):
  63. return self._get_flags(re.escape(self.value))
  64. @property
  65. def min_width(self):
  66. return len(self.value)
  67. max_width = min_width
  68. class PatternRE(Pattern):
  69. def to_regexp(self):
  70. return self._get_flags(self.value)
  71. @property
  72. def min_width(self):
  73. return sre_parse.parse(self.to_regexp()).getwidth()[0]
  74. @property
  75. def max_width(self):
  76. return sre_parse.parse(self.to_regexp()).getwidth()[1]
  77. class TokenDef(object):
  78. def __init__(self, name, pattern, priority=1):
  79. assert isinstance(pattern, Pattern), pattern
  80. self.name = name
  81. self.pattern = pattern
  82. self.priority = priority
  83. def __repr__(self):
  84. return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)