PySide: write a renamer script for debugging

Debug output is hard to read if there are many objects with the same type but different address. This script builds simple names instead of addresses which are easy to track. See the info at the beginning of the script. Change-Id: I51e08276d8ffc6d7365ce8620957e64769d7fd8c Task-number: PYSIDE-79 Task-number: PYSIDE-1470 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io> (cherry picked from commit 8a115fcc3719af509a1abb91805729c06ce38443) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
author: Christian Tismer <tismer@stackless.com> 2021-01-10 18:50:39 +0100
committer: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org> 2021-01-11 15:48:36 +0000
commit: 5a90b97f79c3062149bed49ab45b745ef848c749 (patch)
tree: 04612e29897bf2f02618ed04eac29f10f5f8bcbf
parent: 72485050f28e3d9f6f99c2f9d112d451f5066d4e (diff)
2 files changed, 318 insertions, 0 deletions
diff --git a/tools/debug_renamer.py b/tools/debug_renamer.py
new file mode 100644
index 000000000..da5beb127
--- /dev/null
+++ b/tools/debug_renamer.py
@@ -0,0 +1,122 @@
+#############################################################################
+##
+## Copyright (C) 2020 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of Qt for Python.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+"""
+debug_renamer.py
+================
+
+This script renames object addresses in debug protocols to useful names.
+Comparing output will produce minimal deltas.
+
+
+Problem:
+--------
+
+In the debugging output of PYSIDE-79, we want to study different output
+before and after applying some change to the implementation.
+
+We have support from the modified Python interpreter that creates full
+traces of every object creation and increment/decrement of refcounts.
+
+The comparison between "before" and "after" gets complicated because
+the addresses of objects do not compare well.
+
+
+Input format:
+-------------
+The Python output lines are of this format:
+
+mode filename:lineno funcname object_id typename object_refcount
+
+Mode can be "INC", "DEC", "XINC", XDEC", "NEW, "NEWV".
+
+On "NEW" or "NEWV", an object is created and the refcount is always 1.
+On "DEC" or "XDEC", when refcount is 0, the object is deleted.
+
+
+Operation
+---------
+
+The script reads from <stdin> until EOF. It produces output where the
+object_id field is removed and some text is combined with object_typename
+to produce a unique object name.
+
+
+Example
+-------
+
+You can create reference debugging output by using the modified interpreter at
+
+    https://github.com/ctismer/cpython/tree/3.9-refdebug
+
+and pipe the error output through this script.
+This is work in flux that might change quite often.
+
+
+To Do List
+----------
+
+The script should be re-worked to be more flexible, without relying on
+the number of coulumns but with some intelligent guessing.
+
+Names of objects which are already deleted should be monitored and
+not by chance be re-used.
+"""
+
+import sys
+from collections import OrderedDict
+
+
+def make_name(type_name, name_pos):
+    """
+    Build a name by using uppercase letters and numbers
+    """
+    if name_pos < 26:
+        name = chr(ord("A") + name_pos)
+        return f"{type_name}_{name}"
+    return f"{type_name}_{str(name_pos)}"
+
+
+mode_tokens = "NEW NEWV INC DEC XINC XDEC".split()
+known_types = {}
+
+while 1:
+    line = sys.stdin.readline()
+    if not line:
+        break
+    fields = line.split()
+    if len(fields) != 6 or fields[0] not in mode_tokens:
+        print(line.rstrip())
+        continue
+    mode, fname_lno, funcname, object_id, typename, refcount = fields
+    if typename not in known_types:
+        known_types[typename] = OrderedDict()
+    obj_store = known_types[typename]
+    if object_id not in obj_store:
+        obj_store[object_id] = make_name(typename, len(obj_store))
+    print(f"{mode} {fname_lno} {funcname} {obj_store[object_id]} {refcount}")
diff --git a/tools/leak_finder.py b/tools/leak_finder.py
new file mode 100644
index 000000000..5b5102887
--- /dev/null
+++ b/tools/leak_finder.py
@@ -0,0 +1,196 @@
+#############################################################################
+##
+## Copyright (C) 2020 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of Qt for Python.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+"""
+leak_finder.py
+==============
+
+This script finds memory leaks in Python.
+
+Usage:
+------
+
+Place one or more lines which should be tested for leaks in a loop:
+
+    from leak_finder import LeakFinder
+    ...
+    lf = LeakFinder()
+    for i in range(1000):
+        leaking_statement()
+    lf.find_leak()
+
+
+Theory
+------
+
+How to find a leak?
+
+We repeatedly perform an action and observe if that has an unexpected
+side effect. There are typically two observations:
+
+* one object is growing its refcount (a pseudo-leak)
+* we get many new objects of one type (a true leak)
+
+A difficulty in trying to get leak info is avoiding side effects
+of the measurement. Early attempts with lists of refcounts were
+unsuccessful. Using array.array for counting refcounts avoids that.
+
+
+Algorithm
+---------
+We record a snapshot of all objects in a list and a parallel array
+of refcounts.
+
+Then we do some computation and do the same snapshot again.
+
+The structure of a list of all objects is extending at the front for
+some reason. That makes the captured structures easy to compare.
+We reverse that list and array and have for the objects:
+
+    len(all2) >= len(all1)
+
+    all1[idx] == all2[idx] for idx in range(len(all1))
+
+When taking the second snapshot, the objects still have references from
+the first snapshot.
+For objects with no effect, the following relation is true:
+
+    refs1[idx] == refs2[idx] - 1 for idx in range(len(all1))
+
+All other objects are potential pseudo-leaks, because they waste
+references but no objects in the first place.
+
+Then we look at the newly created objects:
+These objects are real leaks if their number is growing with the probe
+size. For analysis, the number of new objects per type is counted.
+"""
+
+import sys
+import gc
+import array
+import unittest
+
+# this comes from Python, too
+from test import support
+
+try:
+    sys.getobjects
+    have_debug = True
+except AttributeError:
+    have_debug = False
+
+
+class LeakFinder(object):
+    def __init__(self):
+        self.all, self.refs = self._make_snapshot()
+
+    @staticmethod
+    def _make_snapshot():
+        gc.collect()
+        # get all objects
+        all = sys.getobjects(0)
+        # get an array with the refcounts
+        g = sys.getrefcount
+        refs = array.array("l", (g(obj) for obj in all))
+        # the lists have the same endind. Make comparison easier.
+        all.reverse()
+        refs.reverse()
+        return all, refs
+
+    @staticmethod
+    def _short_repr(x, limit=76):
+        s = repr(x)
+        if len(s) > limit:
+            s = s[:limit] + "..."
+        return s
+
+    def find_leak(self):
+        all1 = self.all
+        refs1 = self.refs
+        del self.all, self.refs
+        all2, refs2 = self._make_snapshot()
+        common = len(all1)
+        del all1
+
+        srepr = self._short_repr
+        # look into existing objects for increased refcounts
+        first = True
+        for idx in range(common):
+            ref = refs2[idx] - refs1[idx] - 1
+            if abs(ref) <= 10:
+                continue
+            obj = all2[idx]
+            if first:
+                print()
+            first = False
+            print(f"Fake Leak ref={ref} obj={srepr(obj)}")
+
+        # look at the extra objects by type size
+        types = {}
+        for idx in range(common, len(all2)):
+            obj = all2[idx]
+            typ = type(obj)
+            if typ not in types:
+                types[typ] = []
+            types[typ].append(obj)
+        first = True
+        for typ in types:
+            oblis = types[typ]
+            ref = len(oblis)
+            if ref <= 10:
+                continue
+            try:
+                oblis.sort()
+            except TypeError:
+                pass
+            if first:
+                print()
+            first = False
+            left, mid, right = oblis[0], oblis[ref // 2], oblis[-1]
+            print(f"True Leak ref={ref} typ={typ} left={left} mid={mid} right={right}")
+
+
+class TestDemo(unittest.TestCase):
+
+    @unittest.skipUnless(have_debug, 'You need a debug build with "--with-trace-refs"')
+    def test_demo(self):
+        # create a pseudo leak and a true leak
+        fake_leak_obj = []
+        true_leak_obj = []
+        lf = LeakFinder()
+        refs_before = sys.gettotalrefcount()
+        for idx in range(100):
+            fake_leak_obj.append("same string")
+            true_leak_obj.append(idx + 1000)    # avoiding cached low numbers
+        refs_after = sys.gettotalrefcount()
+        lf.find_leak()
+        self.assertNotAlmostEqual(refs_after - refs_before, 0, delta=10)
+
+
+if __name__ == "__main__":
+    unittest.main()
author	Christian Tismer <tismer@stackless.com>	2021-01-10 18:50:39 +0100
committer	Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>	2021-01-11 15:48:36 +0000
commit	5a90b97f79c3062149bed49ab45b745ef848c749 (patch)
tree	04612e29897bf2f02618ed04eac29f10f5f8bcbf
parent	72485050f28e3d9f6f99c2f9d112d451f5066d4e (diff)