This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

85 líneas
2.4 KiB

  1. ## Lexer Implementation
  2. from utils import Str
  3. class LexError(Exception):
  4. pass
  5. class Token(Str):
  6. def __new__(cls, type, value, pos_in_stream=None):
  7. inst = Str.__new__(cls, value)
  8. inst.type = type
  9. inst.pos_in_stream = pos_in_stream
  10. inst.value = value
  11. return inst
  12. # class Token(object):
  13. # def __init__(self, type, value, lexpos):
  14. # self.type = type
  15. # self.value = value
  16. # self.lexpos = lexpos
  17. def __repr__(self):
  18. return 'Token(%s, %s, %s)' % (self.type, self.value, self.pos_in_stream)
  19. class Regex:
  20. def __init__(self, pattern, flags=()):
  21. self.pattern = pattern
  22. self.flags = flags
  23. import re
  24. LIMIT = 50 # Stupid named groups limit in python re
  25. class Lexer(object):
  26. def __init__(self, tokens, callbacks, ignore=()):
  27. self.ignore = ignore
  28. # Sanitization
  29. token_names = {t[0] for t in tokens}
  30. for t in tokens:
  31. try:
  32. re.compile(t[1])
  33. except:
  34. raise LexError("Cannot compile token: %s: %s" % t)
  35. assert all(t in token_names for t in ignore)
  36. # Init
  37. self.tokens = tokens
  38. self.callbacks = callbacks
  39. self.tokens.sort(key=lambda x:len(x[1]), reverse=True)
  40. self.mres = []
  41. self.name_from_index = []
  42. x = tokens
  43. while x:
  44. mre = re.compile(u'|'.join(u'(?P<%s>%s)'%t for t in x[:LIMIT]))
  45. self.mres.append(mre)
  46. self.name_from_index.append( {i:n for n,i in mre.groupindex.items()} )
  47. x = x[LIMIT:]
  48. def lex(self, stream):
  49. lex_pos = 0
  50. while True:
  51. i = 0
  52. for mre in self.mres:
  53. m = mre.match(stream, lex_pos)
  54. if m:
  55. value = m.group(0)
  56. type_ = self.name_from_index[i][m.lastindex]
  57. t = Token(type_, value, lex_pos)
  58. if t.type in self.callbacks:
  59. self.callbacks[t.type](t)
  60. if t.type not in self.ignore:
  61. yield t
  62. lex_pos += len(value)
  63. break
  64. i += 1
  65. else:
  66. if lex_pos < len(stream):
  67. context = stream[lex_pos:lex_pos+5]
  68. raise LexError("No token defined for: '%s' in %s" % (stream[lex_pos], context))
  69. break