diff options
Diffstat (limited to 'tools/leak_finder.py')
-rw-r--r-- | tools/leak_finder.py | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/tools/leak_finder.py b/tools/leak_finder.py new file mode 100644 index 000000000..5b5102887 --- /dev/null +++ b/tools/leak_finder.py @@ -0,0 +1,196 @@ +############################################################################# +## +## Copyright (C) 2020 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of Qt for Python. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# + +""" +leak_finder.py +============== + +This script finds memory leaks in Python. + +Usage: +------ + +Place one or more lines which should be tested for leaks in a loop: + + from leak_finder import LeakFinder + ... + lf = LeakFinder() + for i in range(1000): + leaking_statement() + lf.find_leak() + + +Theory +------ + +How to find a leak? + +We repeatedly perform an action and observe if that has an unexpected +side effect. There are typically two observations: + +* one object is growing its refcount (a pseudo-leak) +* we get many new objects of one type (a true leak) + +A difficulty in trying to get leak info is avoiding side effects +of the measurement. Early attempts with lists of refcounts were +unsuccessful. Using array.array for counting refcounts avoids that. + + +Algorithm +--------- +We record a snapshot of all objects in a list and a parallel array +of refcounts. + +Then we do some computation and do the same snapshot again. + +The structure of a list of all objects is extending at the front for +some reason. That makes the captured structures easy to compare. +We reverse that list and array and have for the objects: + + len(all2) >= len(all1) + + all1[idx] == all2[idx] for idx in range(len(all1)) + +When taking the second snapshot, the objects still have references from +the first snapshot. +For objects with no effect, the following relation is true: + + refs1[idx] == refs2[idx] - 1 for idx in range(len(all1)) + +All other objects are potential pseudo-leaks, because they waste +references but no objects in the first place. + +Then we look at the newly created objects: +These objects are real leaks if their number is growing with the probe +size. For analysis, the number of new objects per type is counted. +""" + +import sys +import gc +import array +import unittest + +# this comes from Python, too +from test import support + +try: + sys.getobjects + have_debug = True +except AttributeError: + have_debug = False + + +class LeakFinder(object): + def __init__(self): + self.all, self.refs = self._make_snapshot() + + @staticmethod + def _make_snapshot(): + gc.collect() + # get all objects + all = sys.getobjects(0) + # get an array with the refcounts + g = sys.getrefcount + refs = array.array("l", (g(obj) for obj in all)) + # the lists have the same endind. Make comparison easier. + all.reverse() + refs.reverse() + return all, refs + + @staticmethod + def _short_repr(x, limit=76): + s = repr(x) + if len(s) > limit: + s = s[:limit] + "..." + return s + + def find_leak(self): + all1 = self.all + refs1 = self.refs + del self.all, self.refs + all2, refs2 = self._make_snapshot() + common = len(all1) + del all1 + + srepr = self._short_repr + # look into existing objects for increased refcounts + first = True + for idx in range(common): + ref = refs2[idx] - refs1[idx] - 1 + if abs(ref) <= 10: + continue + obj = all2[idx] + if first: + print() + first = False + print(f"Fake Leak ref={ref} obj={srepr(obj)}") + + # look at the extra objects by type size + types = {} + for idx in range(common, len(all2)): + obj = all2[idx] + typ = type(obj) + if typ not in types: + types[typ] = [] + types[typ].append(obj) + first = True + for typ in types: + oblis = types[typ] + ref = len(oblis) + if ref <= 10: + continue + try: + oblis.sort() + except TypeError: + pass + if first: + print() + first = False + left, mid, right = oblis[0], oblis[ref // 2], oblis[-1] + print(f"True Leak ref={ref} typ={typ} left={left} mid={mid} right={right}") + + +class TestDemo(unittest.TestCase): + + @unittest.skipUnless(have_debug, 'You need a debug build with "--with-trace-refs"') + def test_demo(self): + # create a pseudo leak and a true leak + fake_leak_obj = [] + true_leak_obj = [] + lf = LeakFinder() + refs_before = sys.gettotalrefcount() + for idx in range(100): + fake_leak_obj.append("same string") + true_leak_obj.append(idx + 1000) # avoiding cached low numbers + refs_after = sys.gettotalrefcount() + lf.find_leak() + self.assertNotAlmostEqual(refs_after - refs_before, 0, delta=10) + + +if __name__ == "__main__": + unittest.main() |