src/bm/asfstats.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

/****************************************************************************
**
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
** Contact: Qt Software Information (qt-info@nokia.com)
**
** This file is part of the BM project on Qt Labs.
**
** This file may be used under the terms of the GNU General Public
** License version 2.0 or 3.0 as published by the Free Software Foundation
** and appearing in the file LICENSE.GPL included in the packaging of
** this file.  Please review the following information to ensure GNU
** General Public Licensing requirements will be met:
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
** http://www.gnu.org/copyleft/gpl.html.
**
** If you are unsure which license is appropriate for your use, please
** contact the sales department at qt-sales@nokia.com.
**
** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
**
****************************************************************************/

#include "asfstats.h"
#include "bmmisc.h"
#include <QList>
#include <QMap>
#include <QDebug>

/* NOTES

Glossary:

- RH: Result history.


Params:

- diffTolerance (in percent: 0 <= x <= 100)
- stabTolerance (a positive integer: x >= 2)

- sfTolerance (in percent)   - the lowest tolerable SF (i.e. at or above is considered stable)
- lfTolerance (in percent)   - the lowest tolerable LF (i.e. at or above is considered stable)
- maxLDTolerance(in percent) - the highest tolerable maxLD (i.e. at or below is considered stable)

-------------------------------------------------

Def: An equality subsequence (ESS) is a subsequence v1, v2, ..., vn of a RH
     for which the following condition holds:

         ∀ i >= 1 : 100 * (max(vi, v1) / min(vi, v1) - 1) <= diffTolerance


Def: A maximal equality subsequence (MaxESS) is one of the subsequences formed by
     partitioning a RH into the smallest possible number of ESS'es.


Def: The stability fraction (SF) of a RH is the fraction (given as a
     percentage: 0 <= SF <= 100) of its MaxESS'es that are stable.
     More precisely,

         SF = 100 * (stableMaxESS / totalMaxESS),

     where stableMaxESS is the the number of MaxESS'es that have a length of at least
     stabTolerance and totalMaxESS is the total number of MaxESS'es.

Def: The level fraction (LF) of a RH is the fraction (given as a percentage: 0 <= LF <= 100)
     of its levels that are unique (distinct).
     More precisely,

         LF = 100 * (uniqueLevels / totalMaxESS)

     where uniqueLevels is the number of unique levels and totalMaxESS is the total number of
     MaxESS'es. Note that each MaxESS has a level (which is currently defined as its first value).
     Example: A RH with levels 2, 10, 2, 10, 2 has a LF of 40% (2 of 5 levels are unique),
     whereas one with levels 2, 11, 3, 12, 4 has a LF of 100% (5 of 5 levels are unique).

Def: The maximum level distance (maxLD) of a RH is the relative difference (given as a percentage;
     0 <= maxLD <= 100) between the highest and lowest level.
     More precisely:

         maxLD = 100 * ((maxLevel / minLevel) - 1)

-------

Step 1: Identifying unstable RHs

  A RH is considered unstable if at least one of the following conditions are true.
  Except for the first condition, the conditions are evaluated for the raw
  (i.e. unsmoothed) data.

    Cond 1: At least one zero value occurs
    Cond 2: The RH doesn't have a _smoothed_ value before fromTimestamp.
    Cond 3: SF < sfTolerance
    Cond 4: LF < lfTolerance
    Cond 5: maxLD < maxLDTolerance

  Stats produced:
    Stat 1: Count for Cond 1
    Stat 2: Count for Cond 2
    Stat 3: Count for Cond 3
    Stat 4: Count for Cond 4
    Stat 5: Count for Cond 5
    Stat 6: Total unstable count

  Other output:
    The list of BM context IDs of the unstable RHs.


Step 2: Sample the stable and smoothed RHs at fromTimestamp and toTimestamp and compare the
        difference.
        For each RH, the two values to be compared - v1 and v2 - are the latest smoothed
        values that are not later than fromTimestamp and toTimestamp respectively.
        If 100 * (max(v1, v2) / min(v1, v2) <= diffTolerance, the values are considered equal.
        Otherwise, the diff is computed as v1 - v2 for a "lower is better" metric or v2 - v1
        for a "higher is better" metric. If diff < 0, the RH regressed, otherwise it improved.

  Stats produced:
    Stat 1: Regressed count.
    Stat 2: Unchanged count.
    Stat 4: Improved count.

*/


// ### 2 B DOCUMENTED!
void ASFStats::compute(const QList<ResultHistoryInfo *> &rhInfos, StatsInfo *statsInfo)
{
    Q_ASSERT((fromTimestamp >= 0) && (fromTimestamp <= toTimestamp));
    Q_ASSERT(diffTolerance >= 0.0);
    Q_ASSERT(stabTolerance >= 1);
    Q_ASSERT((sfTolerance >= 0.0) && (sfTolerance <= 100.0));
    Q_ASSERT((lfTolerance >= 0.0) && (lfTolerance <= 100.0));
    
    statsInfo->regressed.fill(false, rhInfos.size());
    statsInfo->unchanged.fill(false, rhInfos.size());
    statsInfo->improved.fill(false, rhInfos.size());
    statsInfo->unstable.fill(false, rhInfos.size());
    statsInfo->usZero.fill(false, rhInfos.size());
    statsInfo->usLowFromPos.fill(false, rhInfos.size());
    statsInfo->usLowSF.fill(false, rhInfos.size());
    statsInfo->usLowLF.fill(false, rhInfos.size());
    statsInfo->usHighMaxLD.fill(false, rhInfos.size());

    for (int i = 0; i < rhInfos.size(); ++i) {
        bool zerosFound = false;
        int total = 0;
        int stable = 0;
        int uniqueLevels = 0;
        qreal minLevel = 0;
        qreal maxLevel = 0;

        // Compute stability stats for raw (unsmoothed) data ...
        rhInfos.at(i)->computeStabilityStats(
            diffTolerance, stabTolerance, fromTimestamp, toTimestamp, &zerosFound, &total,
            &stable, &uniqueLevels, &minLevel, &maxLevel);

        Q_ASSERT(total > 0);
        const qreal sf = 100 * (stable / qreal(total));
        const qreal lf = 100 * (uniqueLevels / qreal(total));

        const qreal maxLD = zerosFound ? -1 : (100 * ((maxLevel / minLevel) - 1));

        int fromPos = -1;
        rhInfos.at(i)->findSmoothPos(fromTimestamp, &fromPos);

        bool unstable = false;
        if (zerosFound) {
            unstable = true;
            statsInfo->usZero.setBit(i);
        }
        if (fromPos == -1) {
            unstable = true;
            statsInfo->usLowFromPos.setBit(i);
        }
        if (sf < sfTolerance) {
            unstable = true;
            statsInfo->usLowSF.setBit(i);
        }
        if (lf < lfTolerance) {
            unstable = true;
            statsInfo->usLowLF.setBit(i);
        }
        if (maxLD > maxLDTolerance) {
            unstable = true;
            statsInfo->usHighMaxLD.setBit(i);
        }

        if (unstable) {

            // RH is unstable, so mark it as such ...
            statsInfo->unstable.setBit(i);

        } else {
            // RH is stable, so sample the smoothed values at fromTimestamp and toTimestamp and
            // classify the difference as 'regressed', 'unchanged', or 'improved' ...

            int toPos = -1;
            const bool ok = rhInfos.at(i)->findSmoothPos(toTimestamp, &toPos);
            Q_ASSERT(ok);

            const qreal v1 = rhInfos.at(i)->value(fromPos);
            const qreal v2 = rhInfos.at(i)->value(toPos);
            if ((100 * (qMax(v1, v2) / qMin(v1, v2) - 1)) <= diffTolerance) {
                statsInfo->unchanged.setBit(i);
            } else {
                const qreal diff =
                    BMMisc::lowerIsBetter(rhInfos.at(i)->metric()) ? (v1 - v2) : (v2 - v1);
                if (diff < 0)
                    statsInfo->regressed.setBit(i);
                else
                    statsInfo->improved.setBit(i);
            }
        }
    }
}