tcstats.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. import bisect
  2. import collections
  3. import gc
  4. import gdb
  5. import struct
  6. """
  7. tool to parse/inspect tcmalloc internals, and detect 'lost' memory,
  8. meaning memory which isn't reachable via any (internal) pointers.
  9. http://goog-perftools.sourceforge.net/doc/tcmalloc.html
  10. assumes a 64-bit system, and a recent version of GDB to avoid memleaks
  11. in this tool itself.. still it may use a _lot_ of memory.
  12. """
  13. # tcmalloc
  14. pageheap_ = gdb.parse_and_eval('tcmalloc::Static::pageheap_')
  15. central_cache_ = gdb.parse_and_eval('tcmalloc::Static::central_cache_')
  16. thread_heaps_ = gdb.parse_and_eval('tcmalloc::ThreadCache::thread_heaps_')
  17. sizemap_ = gdb.parse_and_eval('tcmalloc::Static::sizemap_') # XXX cache
  18. spantype = gdb.lookup_type('tcmalloc::Span').pointer()
  19. knumclasses = int(gdb.parse_and_eval('kNumClasses')) # XXX skip 0?
  20. kmaxpages = int(gdb.parse_and_eval('kMaxPages'))
  21. pagesize = 1 << int(gdb.parse_and_eval('kPageShift'))
  22. inferior = gdb.selected_inferior()
  23. LOCATION = {
  24. 0 : 'IN_USE',
  25. 1 : 'NORMAL',
  26. 2 : 'RETURNED',
  27. }
  28. # tcmalloc datastructures
  29. class Span(object):
  30. """ multiple pages combined into a single 'span' """
  31. def __init__(self, val):
  32. self.val = val
  33. self.start = int(val['start']) * pagesize
  34. self.length = int(val['length']) * pagesize
  35. self.next_ = val['next']
  36. self.location = LOCATION[int(val['location'])]
  37. self.sizeclass = int(val['sizeclass'])
  38. self.size = int(sizemap_['class_to_size_'][self.sizeclass])
  39. self.objects_ = val['objects']
  40. @property
  41. def objects(self):
  42. for object in ObjectList(self.objects_):
  43. yield object
  44. @property
  45. def ll_next(self):
  46. return Span(self.val['next'])
  47. def __repr__(self):
  48. return 'Span(%x-%x)' % (self.start, self.start+self.length)
  49. class SpanList(object):
  50. """ list of spans """
  51. def __init__(self, val):
  52. self.val = val
  53. def __iter__(self):
  54. span = Span(self.val)
  55. while span:
  56. if span.next_ == self.val.address:
  57. break
  58. span = span.ll_next
  59. yield span
  60. class Object(object):
  61. """ 'free' object """
  62. def __init__(self, addr):
  63. self.addr = addr
  64. try:
  65. self.next_ = int(str(gdb.parse_and_eval('*(void **)'+hex(addr))), 16)
  66. except gdb.MemoryError: # XXX span type NORMAL, check elsewhere?
  67. print('MEMORY ERROR!!')
  68. self.next_ = 0
  69. @property
  70. def ll_next(self):
  71. if self.next_ != 0:
  72. return Object(self.next_)
  73. def __repr__(self):
  74. return 'Object(%x)' % self.addr
  75. class ObjectList(object):
  76. """ linked-list of 'free' objects """
  77. def __init__(self, val):
  78. self.val = val
  79. def __iter__(self):
  80. addr = int(str(self.val), 16)
  81. if addr != 0:
  82. object = Object(addr)
  83. while object:
  84. yield object
  85. object = object.ll_next
  86. # non-tcmalloc
  87. class Block(object):
  88. __slots__ = ['address', 'size', 'used']
  89. """ unit of memory allocated by program """
  90. def __init__(self, address, size, used):
  91. self.address = address
  92. self.size = size
  93. self.used = used
  94. def __repr__(self):
  95. return 'Block(%x-%x)' % (self.address, self.address+self.size)
  96. # tc-malloc tools
  97. def dump_pageheap():
  98. """ summarize page cache """
  99. print()
  100. print('PAGE HEAP')
  101. for kind in ('normal', 'returned'):
  102. print('(%s)' % kind)
  103. print('pageheap_.large_:')
  104. val = pageheap_['large_'][kind]
  105. for span in SpanList(val):
  106. print(span, span.length)
  107. print('pageheap_.free_:')
  108. for x in range(kmaxpages):
  109. val = pageheap_['free_'][x][kind]
  110. for span in SpanList(val):
  111. print('[%d/%d]' % (x, kmaxpages), span, span.length, span.location)
  112. print
  113. print('PAGE MAP RADIX')
  114. def pagemap_spans():
  115. """ parse 3-level radix tree containing 'spans', covering all memory used by tcmalloc """
  116. prev_end = None
  117. root = pageheap_['pagemap_']['root_']
  118. for a in range(2**12):
  119. node = root['ptrs'][a]
  120. if int(str(node), 16) != 0:
  121. for b in range(2**12):
  122. node2 = node['ptrs'][b]
  123. if int(str(node2), 16) != 0:
  124. for c in range(2**11):
  125. node3 = node2['ptrs'][c]
  126. if int(str(node3), 16) != 0:
  127. pagestart = ((a<<23)+(b<<11)+c)*pagesize
  128. if prev_end is None or pagestart >= prev_end:
  129. span = Span(node3.cast(spantype))
  130. prev_end = pagestart + span.length
  131. yield span
  132. def dump_pagemap():
  133. """ summarizes above page map """
  134. size = 0
  135. for span in pagemap_spans():
  136. size += span.length
  137. print(hex(span.start), span.length, span.size, span.location, '(%d objects)' % len(list(span.objects)), size)
  138. def dump_central_cache():
  139. """ summarizes central free cache """
  140. print
  141. print('CENTRAL CACHE')
  142. for kind in ('empty_', 'nonempty_'):
  143. print('(%s)' % kind)
  144. for x in range(knumclasses):
  145. val = central_cache_[x][kind]
  146. for span in SpanList(val):
  147. print('[%d(%d/%d)]' % (span.size, x, knumclasses), span, span.length, span.location, '(%d objects)' % len(list(span.objects)))
  148. def dump_thread_caches():
  149. """ summarizes thread free caches """
  150. print
  151. print('THREAD CACHES')
  152. print('thread heaps:', gdb.parse_and_eval("'tcmalloc::ThreadCache::thread_heap_count_'"))
  153. heap = thread_heaps_
  154. while heap:
  155. if int(str(heap), 16) == 0:
  156. break
  157. print()
  158. print('[thread %s]' % hex(int(heap['tid_'])))
  159. for x in range(knumclasses):
  160. val = heap['list_'][x]
  161. objects = list(ObjectList(val['list_']))
  162. if objects:
  163. size = int(sizemap_['class_to_size_'][x])
  164. print('[%d(%d/%d)] (%d objects)' % (size, x, knumclasses, len(objects)))
  165. print()
  166. heap = heap['next_']
  167. def thread_cache_objaddrs():
  168. """ addresses of free objects in thread cache, used in pagemap_blocks """
  169. result = set()
  170. heap = thread_heaps_
  171. count = 0
  172. while heap:
  173. if int(str(heap), 16) == 0:
  174. break
  175. for x in range(knumclasses):
  176. val = heap['list_'][x]
  177. objects = list(ObjectList(val['list_']))
  178. for object_ in objects:
  179. result.add(object_.addr)
  180. count += 1
  181. heap = heap['next_']
  182. return result
  183. def pagemap_blocks():
  184. """ determines memory 'blocks', allocated by the program """
  185. thread_objs = thread_cache_objaddrs() # free objects are in central cache _or_ thread cache
  186. for span in pagemap_spans():
  187. if span.location == 'IN_USE':
  188. if int(span.sizeclass) != 0:
  189. chunks = span.length // span.size
  190. starts = set([span.start+x*span.size for x in range(chunks)])
  191. for obj in span.objects:
  192. starts.remove(obj.addr)
  193. starts -= thread_objs
  194. for start in starts:
  195. yield Block(start, span.size, True)
  196. for obj in span.objects:
  197. yield Block(obj.addr, span.size, False)
  198. else:
  199. yield Block(span.start, span.length, True)
  200. elif span.location == 'NORMAL':
  201. yield Block(span.start, span.length, False)
  202. # non-tc-malloc tools
  203. def data_stacks():
  204. """ yield all stack data, as it may contain pointers """
  205. for thread in inferior.threads():
  206. thread.switch()
  207. frame = gdb.newest_frame()
  208. frame.select()
  209. stack_pointer = int(str(gdb.parse_and_eval('$sp')), 16)
  210. while frame:
  211. wa = str(frame) # XXX
  212. base_pointer = int(wa[wa.find('=')+1:wa.find(',')], 16)
  213. frame = frame.older()
  214. data = bytes(inferior.read_memory(stack_pointer, base_pointer-stack_pointer))
  215. yield data
  216. def data_segments(): # XXX ugly, overkill?
  217. """ yield all global segments, as they may contain pointers """
  218. part = None
  219. s = gdb.execute('info files', to_string=True)
  220. for line in s.splitlines():
  221. line = line.strip()
  222. if line.startswith('Local core'):
  223. part = 'core'
  224. elif line.startswith('Local exec'):
  225. part = 'exec'
  226. elif part == 'exec' and line.startswith('0x'):
  227. parts = line.split()
  228. if True: #parts[4] in ('.data', '.bss') and not 'tcmalloc' in parts[-1]:
  229. start= int(parts[0], 16)
  230. size = int(parts[2], 16) - start
  231. data = bytes(inferior.read_memory(start, size))
  232. yield data
  233. def bisect_block(address, starts, start_block):
  234. """ return block to which address belongs or None """
  235. b = bisect.bisect_right(starts, address)
  236. if b > 0:
  237. block = start_block[starts[b-1]]
  238. if block.address <= address < block.address+block.size:
  239. return block
  240. def search_reachable(reachable, starts, start_block):
  241. """ return addresses of blocks reachable from initial set of addresses """
  242. front = reachable.copy()
  243. vopo = gdb.lookup_type('void').pointer()
  244. scanned = 0
  245. while front:
  246. new = set()
  247. for addr in front:
  248. block = start_block[addr]
  249. data = inferior.read_memory(block.address, block.size)
  250. for x in range(block.size-8):
  251. addr = struct.unpack_from('Q', data, x)[0]
  252. block2 = bisect_block(addr, starts, start_block)
  253. if block2 and block2.address not in reachable:
  254. new.add(block2.address)
  255. reachable.add(block2.address)
  256. scanned += block.size
  257. # print('scanned', scanned)
  258. front = new
  259. return reachable
  260. def reachable_blocks(blocks):
  261. """ follow pointer chain from stack and global data to determine unreachable blocks """
  262. starts = sorted(b.address for b in blocks)
  263. start_block = {b.address: b for b in blocks}
  264. data = []
  265. data.extend(data_stacks())
  266. data.extend(data_segments())
  267. reachable = set()
  268. for d in data:
  269. for x in range(len(d)-8):
  270. addr = struct.unpack_from('Q', d, x)[0]
  271. block = bisect_block(addr, starts, start_block)
  272. if block:
  273. reachable.add(block.address)
  274. for addr in search_reachable(reachable, starts, start_block):
  275. yield start_block[addr]
  276. def used_blocks(blocks):
  277. for block in blocks:
  278. if block.used:
  279. yield block
  280. def free_blocks(blocks):
  281. for block in blocks:
  282. if not block.used:
  283. yield block
  284. def freq_blocks(blocks):
  285. summary = collections.defaultdict(int)
  286. count = 0
  287. size = 0
  288. for block in blocks:
  289. count += 1
  290. size += block.size
  291. summary[block.size] += 1
  292. return summary, count, size
  293. def dump_blocks(blocks):
  294. """ summarizes given blocks """
  295. print
  296. print('BLOCK SUMMARY')
  297. summary, count, size = freq_blocks(blocks)
  298. print('%d blocks, %d total size' % (count, size))
  299. print('size frequencies:')
  300. for a,b in summary.items():
  301. print(a, b)
  302. def unreachable_blocks(used_blocks):
  303. """ determine unreachable blocks """
  304. start_block = {b.address: b for b in used_blocks}
  305. reachable = set(b.address for b in reachable_blocks(used_blocks))
  306. return [start_block[addr] for addr in (set(start_block)-reachable)]
  307. def subtract_blocks(ax, bx):
  308. astart_block = {a.address: a for a in ax}
  309. bstart_block = {b.address: b for b in bx}
  310. return [astart_block[addr] for addr in (set(astart_block) - set(bstart_block))]
  311. def main():
  312. # tcmalloc
  313. # dump_pageheap()
  314. # dump_pagemap()
  315. # dump_central_cache()
  316. # dump_thread_caches()
  317. blocks = list(pagemap_blocks())
  318. used = list(used_blocks(blocks))
  319. free = list(free_blocks(blocks))
  320. print()
  321. print('USED:')
  322. dump_blocks(used)
  323. print()
  324. print('FREE:')
  325. dump_blocks(free)
  326. # reachability
  327. print
  328. print('LOST:')
  329. unreached = unreachable_blocks(used)
  330. dump_blocks(unreached)
  331. if __name__ == '__main__':
  332. main()