#############################################################################
##
## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of Qt for Python.
##
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and The Qt Company. For licensing terms
## and conditions see https://www.qt.io/terms-conditions. For further
## information use the contact form at https://www.qt.io/contact-us.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 3 as published by the Free Software
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
## included in the packaging of this file. Please review the following
## information to ensure the GNU General Public License requirements will
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################

"""
leak_finder.py
==============

This script finds memory leaks in Python.

Usage:
------

Place one or more lines which should be tested for leaks in a loop:

    from leak_finder import LeakFinder
    ...
    lf = LeakFinder()
    for i in range(1000):
        leaking_statement()
    lf.find_leak()


Theory
------

How to find a leak?

We repeatedly perform an action and observe if that has an unexpected
side effect. There are typically two observations:

* one object is growing its refcount (a pseudo-leak)
* we get many new objects of one type (a true leak)

A difficulty in trying to get leak info is avoiding side effects
of the measurement. Early attempts with lists of refcounts were
unsuccessful. Using array.array for counting refcounts avoids that.


Algorithm
---------
We record a snapshot of all objects in a list and a parallel array
of refcounts.

Then we do some computation and do the same snapshot again.

The structure of a list of all objects is extending at the front for
some reason. That makes the captured structures easy to compare.
We reverse that list and array and have for the objects:

    len(all2) >= len(all1)

    all1[idx] == all2[idx] for idx in range(len(all1))

When taking the second snapshot, the objects still have references from
the first snapshot.
For objects with no effect, the following relation is true:

    refs1[idx] == refs2[idx] - 1 for idx in range(len(all1))

All other objects are potential pseudo-leaks, because they waste
references but no objects in the first place.

Then we look at the newly created objects:
These objects are real leaks if their number is growing with the probe
size. For analysis, the number of new objects per type is counted.
"""

import sys
import gc
import array
import unittest

# this comes from Python, too
from test import support

try:
    sys.getobjects
    have_debug = True
except AttributeError:
    have_debug = False


class LeakFinder(object):
    def __init__(self):
        self.all, self.refs = self._make_snapshot()

    @staticmethod
    def _make_snapshot():
        gc.collect()
        # get all objects
        all = sys.getobjects(0)
        # get an array with the refcounts
        g = sys.getrefcount
        refs = array.array("l", (g(obj) for obj in all))
        # the lists have the same endind. Make comparison easier.
        all.reverse()
        refs.reverse()
        return all, refs

    @staticmethod
    def _short_repr(x, limit=76):
        s = repr(x)
        if len(s) > limit:
            s = s[:limit] + "..."
        return s

    def find_leak(self):
        all1 = self.all
        refs1 = self.refs
        del self.all, self.refs
        all2, refs2 = self._make_snapshot()
        common = len(all1)
        del all1

        srepr = self._short_repr
        # look into existing objects for increased refcounts
        first = True
        for idx in range(common):
            ref = refs2[idx] - refs1[idx] - 1
            if abs(ref) <= 10:
                continue
            obj = all2[idx]
            if first:
                print()
            first = False
            print(f"Fake Leak ref={ref} obj={srepr(obj)}")

        # look at the extra objects by type size
        types = {}
        for idx in range(common, len(all2)):
            obj = all2[idx]
            typ = type(obj)
            if typ not in types:
                types[typ] = []
            types[typ].append(obj)
        first = True
        for typ in types:
            oblis = types[typ]
            ref = len(oblis)
            if ref <= 10:
                continue
            try:
                oblis.sort()
            except TypeError:
                pass
            if first:
                print()
            first = False
            left, mid, right = oblis[0], oblis[ref // 2], oblis[-1]
            print(f"True Leak ref={ref} typ={typ} left={left} mid={mid} right={right}")


class TestDemo(unittest.TestCase):

    @unittest.skipUnless(have_debug, 'You need a debug build with "--with-trace-refs"')
    def test_demo(self):
        # create a pseudo leak and a true leak
        fake_leak_obj = []
        true_leak_obj = []
        lf = LeakFinder()
        refs_before = sys.gettotalrefcount()
        for idx in range(100):
            fake_leak_obj.append("same string")
            true_leak_obj.append(idx + 1000)    # avoiding cached low numbers
        refs_after = sys.gettotalrefcount()
        lf.find_leak()
        self.assertNotAlmostEqual(refs_after - refs_before, 0, delta=10)


if __name__ == "__main__":
    unittest.main()