This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 rivejä
3.2 KiB

  1. import re
  2. import sre_parse
  3. class GrammarError(Exception):
  4. pass
  5. class ParseError(Exception):
  6. pass
  7. class UnexpectedToken(ParseError):
  8. def __init__(self, token, expected, seq, index):
  9. self.token = token
  10. self.expected = expected
  11. self.line = getattr(token, 'line', '?')
  12. self.column = getattr(token, 'column', '?')
  13. try:
  14. context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
  15. except AttributeError:
  16. context = seq[index:index+5]
  17. except TypeError:
  18. context = "<no context>"
  19. message = ("Unexpected token %r at line %s, column %s.\n"
  20. "Expected: %s\n"
  21. "Context: %s" % (token, self.line, self.column, expected, context))
  22. super(UnexpectedToken, self).__init__(message)
  23. def is_terminal(sym):
  24. return isinstance(sym, Terminal) or sym.isupper() or sym[0] == '$'
  25. class LexerConf:
  26. def __init__(self, tokens, ignore=(), postlex=None):
  27. self.tokens = tokens
  28. self.ignore = ignore
  29. self.postlex = postlex
  30. class ParserConf:
  31. def __init__(self, rules, callback, start):
  32. assert all(len(r) == 4 for r in rules)
  33. self.rules = rules
  34. self.callback = callback
  35. self.start = start
  36. class Pattern(object):
  37. def __init__(self, value, flags=None):
  38. self.value = value
  39. self.flags = flags
  40. def __repr__(self):
  41. return repr(self._get_flags() + self.value)
  42. # Pattern Hashing assumes all subclasses have a different priority!
  43. def __hash__(self):
  44. return hash((type(self), self.value))
  45. def __eq__(self, other):
  46. return type(self) == type(other) and self.value == other.value
  47. def _get_flags(self):
  48. if self.flags:
  49. assert len(self.flags) == 1
  50. return '(?%s)' % self.flags
  51. return ''
  52. class PatternStr(Pattern):
  53. def to_regexp(self):
  54. return self._get_flags() + re.escape(self.value)
  55. @property
  56. def min_width(self):
  57. return len(self.value)
  58. max_width = min_width
  59. class PatternRE(Pattern):
  60. def to_regexp(self):
  61. return self._get_flags() + self.value
  62. @property
  63. def min_width(self):
  64. return sre_parse.parse(self.to_regexp()).getwidth()[0]
  65. @property
  66. def max_width(self):
  67. return sre_parse.parse(self.to_regexp()).getwidth()[1]
  68. class TokenDef(object):
  69. def __init__(self, name, pattern):
  70. assert isinstance(pattern, Pattern), pattern
  71. self.name = name
  72. self.pattern = pattern
  73. def __repr__(self):
  74. return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
  75. class Terminal:
  76. def __init__(self, data):
  77. self.data = data
  78. def __repr__(self):
  79. return '%r' % self.data
  80. def __eq__(self, other):
  81. return isinstance(other, type(self)) and self.data == other.data
  82. def __hash__(self):
  83. return hash(self.data)
  84. class Terminal_Regexp(Terminal):
  85. def __init__(self, name, regexp):
  86. Terminal.__init__(self, regexp)
  87. self.name = name
  88. self.match = re.compile(regexp).match
  89. class Terminal_Token(Terminal):
  90. def match(self, other):
  91. return self.data == other.type