1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
from dbaccess import execQuery
from statlib import stats
from misc import metricIdToLowerIsBetter, getContext
class SingleContextBMStats:
def __init__(
self, ntotal, nvalid, min_, max_, median, mean, mean_is_zero,
stddev, rsd, rse, hist, metric_id, lower_is_better, benchmark_id):
self.ntotal = ntotal
self.nvalid = nvalid
self.min_ = min_
self.max_ = max_
self.median = median
self.mean = mean
self.mean_is_zero = mean_is_zero
self.stddev = stddev
self.rsd = rsd
self.rse = rse
self.hist = hist # histogram (bin distribution in percentages)
self.metric_id = metric_id
self.lower_is_better = lower_is_better
self.benchmark_id = benchmark_id
# Computes the bin distribution for a histogram where the bins form a regular
# partition of the [min, max] value range.
#
# Returns a tuple with nbins elements, where element 0 is the percentage
# (in the range [0, 100]) of the values that fall into the first bin etc.
#
# If all values are equal (i.e. min == max) or the number of values is zero,
# the function returns a tuple with nbins elements that all have a value of -1.
def createHistogram(nbins, values):
assert nbins > 0
nvalues = len(values)
if nvalues == 0:
return tuple([-1] * nbins)
min_ = min(values)
max_ = max(values)
try:
fact = 1.0 / (max_ - min_)
except ZeroDivisionError:
return tuple([-1] * nbins)
freq = [0] * nbins
for val in values:
frac = (val - min_) * fact
i = int(frac * nbins)
assert i >= 0
i = min(i, nbins - 1)
freq[i] = freq[i] + 1
p = []
for i in range(nbins):
p.append(100.0 * freq[i] / nvalues)
return tuple(p)
# Extracts stats for a given benchmark/metric/context combination.
# Returns a SingleContextBMStats object.
def extractSingleContextBMStats(context_id, benchmark_id, metric_id):
values = []
query_result = execQuery("SELECT value, valid FROM result"
" WHERE contextId = %d"
" AND benchmarkId = %d"
" AND metricId = %d;"
% (context_id, benchmark_id, metric_id))
for value, valid in query_result:
if valid:
values.append(value)
min_ = max_ = median = mean = stddev = rsd = rse = -1.0
mean_is_zero = False
if len(values) > 0:
# Compute stats for the valid values:
min_ = min(values)
max_ = max(values)
median = stats.medianscore(values)
mean = float(stats.mean(values))
if len(values) > 1:
stddev = stats.stdev(values) # standard deviation
try:
# relative standard deviation:
rsd = 100 * abs(stddev / mean)
# relative standard error:
rse = 100 * (stats.sem(values) / mean)
except ZeroDivisionError:
mean_is_zero = True
nbins = 10 # ### Hardcoded for now - should be specified by the client!
hist = createHistogram(nbins, values)
return SingleContextBMStats(
len(query_result), len(values), min_, max_, median, mean,
mean_is_zero, stddev, rsd, rse, hist, metric_id,
metricIdToLowerIsBetter(metric_id), benchmark_id)
|