bencode.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #!/usr/bin/env python
  2. '''
  3. nrepl.bencode
  4. -------------
  5. This module provides BEncode-protocol support.
  6. :copyright: (c) 2013 by Chas Emerick.
  7. :license: MIT, see LICENSE for more details.
  8. '''
  9. try:
  10. from cStringIO import StringIO
  11. except ImportError:
  12. from io import StringIO
  13. import sys, os
  14. # Some code so we can use different features without worrying about versions.
  15. PY2 = sys.version_info[0] == 2
  16. if not PY2:
  17. text_type = str
  18. string_types = (str, bytes)
  19. unichr = chr
  20. else:
  21. text_type = unicode
  22. string_types = (str, unicode)
  23. unichr = unichr
  24. def _read_byte(s):
  25. return s.read(1)
  26. def _read_int(s, terminator=None, init_data=None):
  27. int_chrs = init_data or []
  28. while True:
  29. c = _read_byte(s)
  30. if not c.isdigit() or c == terminator or not c:
  31. break
  32. else:
  33. int_chrs.append(c)
  34. return int(''.join(int_chrs))
  35. def _read_bytes(s, n):
  36. data = StringIO()
  37. cnt = 0
  38. while cnt < n:
  39. m = s.read(n - cnt)
  40. if not m:
  41. raise Exception("Invalid bytestring, unexpected end of input.")
  42. data.write(m)
  43. cnt += len(m)
  44. data.flush()
  45. # Taking into account that Python3 can't decode strings
  46. try:
  47. ret = data.getvalue().decode("UTF-8")
  48. except AttributeError:
  49. ret = data.getvalue()
  50. return ret
  51. def _read_delimiter(s):
  52. d = _read_byte(s)
  53. if d.isdigit():
  54. d = _read_int(s, ":", [d])
  55. return d
  56. def _read_list(s):
  57. data = []
  58. while True:
  59. datum = _read_datum(s)
  60. if not datum:
  61. break
  62. data.append(datum)
  63. return data
  64. def _read_map(s):
  65. i = iter(_read_list(s))
  66. return dict(zip(i, i))
  67. _read_fns = {"i": _read_int,
  68. "l": _read_list,
  69. "d": _read_map,
  70. "e": lambda _: None,
  71. # EOF
  72. None: lambda _: None}
  73. def _read_datum(s):
  74. delim = _read_delimiter(s)
  75. if delim:
  76. return _read_fns.get(delim, lambda s: _read_bytes(s, delim))(s)
  77. def encode_bytes(x):
  78. return (str(len(x)).encode('utf-8'), b':', x)
  79. def encode_string(x):
  80. try:
  81. s = x.encode('utf-8')
  82. except UnicodeDecodeError:
  83. return encode_bytes(x)
  84. return (str(len(s)).encode('utf-8'), b':', s)
  85. def _write_datum(x, out):
  86. if isinstance(x, string_types):
  87. #x = x.encode("utf-8")
  88. # TODO revisit encodings, this is surely not right. Python
  89. # (2.x, anyway) conflates bytes and strings, but 3.x does not...
  90. l = len(x.replace("\n", "nn"))
  91. if os.name is "posix":
  92. l = len(x)
  93. out.write(str(l))
  94. out.write(":")
  95. out.write(x)
  96. elif isinstance(x, int):
  97. out.write("i")
  98. out.write(str(x))
  99. out.write("e")
  100. elif isinstance(x, (list, tuple)):
  101. out.write("l")
  102. for v in x:
  103. _write_datum(v, out)
  104. out.write("e")
  105. elif isinstance(x, dict):
  106. out.write("d")
  107. for k, v in x.items():
  108. _write_datum(k, out)
  109. _write_datum(v, out)
  110. out.write("e")
  111. out.flush()
  112. def encode(v):
  113. "bencodes the given value, may be a string, integer, list, or dict."
  114. s = StringIO()
  115. _write_datum(v, s)
  116. return s.getvalue()
  117. def decode_file(file):
  118. while True:
  119. x = _read_datum(file)
  120. if not x:
  121. break
  122. yield x
  123. def decode(string):
  124. "Generator that yields decoded values from the input string."
  125. return decode_file(StringIO(string))
  126. class BencodeIO(object):
  127. def __init__(self, file, on_close=None):
  128. self._file = file
  129. self._on_close = on_close
  130. def read(self):
  131. return _read_datum(self._file)
  132. def __iter__(self):
  133. return self
  134. def next(self):
  135. v = self.read()
  136. if not v:
  137. raise StopIteration
  138. return v
  139. def __next__(self):
  140. # In Python3, __next__ it is an own special class.
  141. v = self.read()
  142. if not v:
  143. raise StopIteration
  144. return v
  145. def write(self, v):
  146. return _write_datum(v, self._file)
  147. def flush(self):
  148. if self._file.flush:
  149. self._file.flush()
  150. def close(self):
  151. # Run the on_close handler if one exists, which can do something
  152. # useful like cleanly close a socket. (Note that .close() on a
  153. # socket.makefile('rw') does some kind of unclean close.)
  154. if self._on_close is not None:
  155. self._on_close()
  156. else:
  157. self._file.close()