testing/blacklist.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

#############################################################################
##
## Copyright (C) 2017 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of PySide2.
##
## $QT_BEGIN_LICENSE:LGPL$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and The Qt Company. For licensing terms
## and conditions see https://www.qt.io/terms-conditions. For further
## information use the contact form at https://www.qt.io/contact-us.
##
## GNU Lesser General Public License Usage
## Alternatively, this file may be used under the terms of the GNU Lesser
## General Public License version 3 as published by the Free Software
## Foundation and appearing in the file LICENSE.LGPL3 included in the
## packaging of this file. Please review the following information to
## ensure the GNU Lesser General Public License version 3 requirements
## will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 2.0 or (at your option) the GNU General
## Public license version 3 or any later version approved by the KDE Free
## Qt Foundation. The licenses are as published by the Free Software
## Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
## included in the packaging of this file. Please review the following
## information to ensure the GNU General Public License requirements will
## be met: https://www.gnu.org/licenses/gpl-2.0.html and
## https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################

from __future__ import print_function

from .helper import decorate, StringIO
from .buildlog import builds


class BlackList(object):
    def __init__(self, blname):
        if blname == None:
            f = StringIO()
            self.raw_data = []
        else:
            with open(blname) as f:
                self.raw_data = f.readlines()
        # keep all lines, but see what is not relevant
        lines = self.raw_data[:]

        def filtered_line(line):
            if '#' in line:
                line = line[0:line.index('#')]
            return line.split()

        # now put every bracketed line in a test
        # and use subsequent identifiers for a match
        def is_test(fline):
            return fline and fline[0].startswith("[")

        self.tests = {}

        if not lines:
            # nothing supplied
            return

        self.index = {}
        for idx, line in enumerate(lines):
            fline = filtered_line(line)
            if not fline:
                continue
            if is_test(fline):
                break
            # we have a global section
            name = ''
            self.tests[name] = []
        for idx, line in enumerate(lines):
            fline = filtered_line(line)
            if is_test(fline):
                # a new name
                name = decorate(fline[0][1:-1])
                self.tests[name] = []
                self.index[name] = idx
            elif fline:
                # a known name with a new entry
                self.tests[name].append(fline)

    def find_matching_line(self, test):
        """
        Take a test result.
        Find a line in the according blacklist file where all keys of the line are found.
        If line not found, do nothing.
        if line found and test passed, it is a BPASS.
        If line found and test failed, it is a BFAIL.
        """
        passed = test.passed
        classifiers = set(builds.classifiers)

        if "" in self.tests:
            # this is a global section
            for line in self.tests[""]:
                keys = set(line)
                if keys <= classifiers:
                    # found a match!
                    return line
        mod_name = test.mod_name
        if mod_name not in self.tests and decorate(mod_name) not in self.tests:
            return None
        if mod_name in self.tests:
            thing = mod_name
        else:
            thing = decorate(mod_name)
        for line in self.tests[thing]:
            keys = set(line)
            if keys <= classifiers:
                # found a match!
                return line
        else:
            return None # noting found


"""
Simplified blacklist file
-------------------------

A comment reaches from '#' to the end of line.
The file starts with an optional global section.
A test is started with a [square-bracketed] section name.
A line matches if all keys in the line are found.
If a line matches, the corresponding test is marked BFAIL or BPASS depending if the test passed or
not.

Known keys are:

darwin
win32
linux
...

qt5.6.1
qt5.6.2
...

py3
py2

32bit
64bit

debug
release
"""

"""
Data Folding v2
===============

In the first layout of data folding, we distinguished complete domains
like "debug/release" and incomplete domains like "ubuntu/win32" which
can be extended to any number.

This version is simpler. We do a first pass over all data and collect
all data. Therefore, incomplete domains do not exist. The definition
of the current members of the domain goes into a special comment at
the beginning of the file.


Compressing a blacklist
-----------------------

When we have many samples of data, it is very likely to get very similar
entries. The redundancy is quite high, and we would like to compress
data without loosing information.

Consider the following data set:

[some::sample_test]
    darwin qt5.6.1 py3 64bit debug
    darwin qt5.6.1 py3 64bit release
    darwin qt5.6.1 py2 64bit debug
    darwin qt5.6.1 py2 64bit release
    win32 qt5.6.1 py3 64bit debug
    win32 qt5.6.1 py3 64bit release
    win32 qt5.6.1 py2 64bit debug
    win32 qt5.6.1 py2 64bit release

The keys "debug" and "release" build the complete set of keys in their
domain. When sorting the lines, we can identify all similar entries which
are only different by the keys "debug" and "release".

[some::sample_test]
    darwin qt5.6.1 py3 64bit
    darwin qt5.6.1 py2 64bit
    win32 qt5.6.1 py3 64bit
    win32 qt5.6.1 py2 64bit

We can do the same for "py3" and "py2", because we have again the complete
set of possible keys available:

[some::sample_test]
    darwin qt5.6.1 64bit
    win32 qt5.6.1 64bit

The operating system has the current keys "darwin" and "win32".
They are kept in a special commend, and we get:

# COMPRESSION: darwin win32
[some::sample_test]
    qt5.6.1 64bit


Expanding a blacklist
---------------------

All of the above steps are completely reversible.


Alternate implementation
------------------------

Instead of using a special comment, I am currently in favor of
the following:

The global section gets the complete set of variables, like so

# Globals
    darwin win32 linux
    qt5.6.1 qt5.6.2
    py3 py2
    32bit 64bit
    debug release
[some::sample_test]
    qt5.6.1 64bit

This approach has the advantage that it does not depend on comments.
The lines in the global section can always added without any conflict,
because these test results are impossible. Therefore, we list all our
keys without adding anything that could influence a test.
It makes also sense to have everything explicitly listed here.
"""

def learn_blacklist(fname, result, selected):
    with open(fname, "r+") as f:
        _remove_from_blacklist(f.name)
        _add_to_blacklist(f.name, result)
        _update_header(f.name, selected)

def _remove_from_blacklist(old_blname):
    # get rid of existing classifiers
    classifierset = set(builds.classifiers)

    # for every line, remove comments and see if the current set is an exact
    # match. We will touch only exact matches.
    def filtered_line(line):
        if '#' in line:
            line = line[0:line.index('#')]
        return line.split()

    with open(old_blname, "r") as f:
        lines = f.readlines()
    deletions = []
    for idx, line in enumerate(lines):
        fline = filtered_line(line)
        if not fline:
            continue
        if '[' in fline[0]:
            # a heading line
            continue
        if set(fline) == classifierset:
            deletions.append(idx)
    while deletions:
        delete = deletions.pop()
        del lines[delete]
    # remove all blank lines, but keep comments
    for idx, line in reversed(list(enumerate(lines))):
        if not line.split():
            del lines[idx]
    # remove all consecutive sections, but keep comments
    for idx, line in reversed(list(enumerate(lines))):
        fline = line.split()
        if fline and fline[0].startswith("["):
            if idx+1 == len(lines):
                # remove section at the end
                del lines[idx]
                continue
            gline = lines[idx+1].split()
            if gline and gline[0].startswith("["):
                # next section right after this, remove this
                del lines[idx]
    with open(old_blname, "w") as f:
        f.writelines(lines)

def _add_to_blacklist(old_blname, result):
    # insert new classifiers
    classifiers = "    " + " ".join(builds.classifiers) + "\n"
    insertions = []
    additions = []
    old_bl = BlackList(old_blname)
    lines = old_bl.raw_data[:]
    if lines and not lines[-1].endswith("\n"):
        lines[-1] += "\n"
    for test in result:
        if test.passed:
            continue
        if test.mod_name in old_bl.tests:
            # the test is already there, add to the first line
            idx = old_bl.index[test.mod_name]
            insertions.append(idx)
        if decorate(test.mod_name) in old_bl.tests:
            # the same, but the name was decorated
            idx = old_bl.index[decorate(test.mod_name)]
            insertions.append(idx)
        else:
            # the test is new, append it to the end
            additions.append("[" + decorate(test.mod_name) + "]\n")
    while insertions:
        this = insertions.pop()
        lines[this] += classifiers
    for line in additions:
        lines.append(line)
        lines.append(classifiers)
    # now write the data out
    with open(old_blname, "r+") as f:
        f.writelines(lines)

def _update_header(old_blname, selected):
    with open(old_blname) as f:
        lines = f.readlines()
    classifierset = set(builds.classifiers)
    for idx, line in reversed(list(enumerate(lines))):
        fline = line.split()
        if fline and fline[0].startswith('#'):
            if set(fline) >= classifierset:
                del lines[idx]

    classifiers = " ".join(builds.classifiers)
    path = selected.log_dir
    base = os.path.basename(path)
    test = '### test date = %s   classifiers = %s\n' % (base, classifiers)
    lines.insert(0, test)
    with open(old_blname, "w") as f:
        f.writelines(lines)