scripts/singlecontextbmstats.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

from dbaccess import execQuery
from statlib import stats
from misc import metricIdToLowerIsBetter, getContext

class SingleContextBMStats:
    def __init__(
        self, ntotal, nvalid, min_, max_, median, mean, mean_is_zero,
        stddev, rsd, rse, hist, metric_id, lower_is_better, benchmark_id):
        self.ntotal = ntotal
        self.nvalid = nvalid
        self.min_ = min_
        self.max_ = max_
        self.median = median
        self.mean = mean
        self.mean_is_zero = mean_is_zero
        self.stddev = stddev
        self.rsd = rsd
        self.rse = rse
        self.hist = hist # histogram (bin distribution in percentages)
        self.metric_id = metric_id
        self.lower_is_better = lower_is_better
        self.benchmark_id = benchmark_id

# Computes the bin distribution for a histogram where the bins form a regular
# partition of the [min, max] value range.
#
# Returns a tuple with nbins elements, where element 0 is the percentage
# (in the range [0, 100]) of the values that fall into the first bin etc.
#
# If all values are equal (i.e. min == max) or the number of values is zero,
# the function returns a tuple with nbins elements that all have a value of -1.
def createHistogram(nbins, values):
    assert nbins > 0

    nvalues = len(values)
    if nvalues == 0:
        return tuple([-1] * nbins)

    min_ = min(values)
    max_ = max(values)
    try:
        fact = 1.0 / (max_ - min_)
    except ZeroDivisionError:
        return tuple([-1] * nbins)

    freq = [0] * nbins
    for val in values:
        frac = (val - min_) * fact
        i = int(frac * nbins)
        assert i >= 0
        i = min(i, nbins - 1)
        freq[i] = freq[i] + 1

    p = []
    for i in range(nbins):
        p.append(100.0 * freq[i] / nvalues)

    return tuple(p)

# Extracts stats for a given benchmark/metric/context combination.
# Returns a SingleContextBMStats object.
def extractSingleContextBMStats(context_id, benchmark_id, metric_id):

    values = []

    query_result = execQuery("SELECT value, valid FROM result"
        " WHERE contextId = %d"
        " AND benchmarkId = %d"
        " AND metricId = %d;"
            % (context_id, benchmark_id, metric_id))

    for value, valid in query_result:
        if valid:
            values.append(value)

    min_ = max_ = median = mean = stddev = rsd = rse = -1.0
    mean_is_zero = False

    if len(values) > 0:
        # Compute stats for the valid values:
        min_ = min(values)
        max_ = max(values)
        median = stats.medianscore(values)
        mean = float(stats.mean(values))
        if len(values) > 1:
            stddev = stats.stdev(values) # standard deviation
            try:
                # relative standard deviation:
                rsd = 100 * abs(stddev / mean)
                # relative standard error:
                rse = 100 * (stats.sem(values) / mean)
            except ZeroDivisionError:
                mean_is_zero = True

    nbins = 10 # ### Hardcoded for now - should be specified by the client!
    hist = createHistogram(nbins, values)

    return SingleContextBMStats(
        len(query_result), len(values), min_, max_, median, mean,
        mean_is_zero, stddev, rsd, rse, hist, metric_id,
        metricIdToLowerIsBetter(metric_id), benchmark_id)