from dbaccess import execQuery from statlib import stats from misc import metricIdToLowerIsBetter, getContext class SingleContextBMStats: def __init__( self, ntotal, nvalid, min_, max_, median, mean, mean_is_zero, stddev, rsd, rse, hist, metric_id, lower_is_better, benchmark_id): self.ntotal = ntotal self.nvalid = nvalid self.min_ = min_ self.max_ = max_ self.median = median self.mean = mean self.mean_is_zero = mean_is_zero self.stddev = stddev self.rsd = rsd self.rse = rse self.hist = hist # histogram (bin distribution in percentages) self.metric_id = metric_id self.lower_is_better = lower_is_better self.benchmark_id = benchmark_id # Computes the bin distribution for a histogram where the bins form a regular # partition of the [min, max] value range. # # Returns a tuple with nbins elements, where element 0 is the percentage # (in the range [0, 100]) of the values that fall into the first bin etc. # # If all values are equal (i.e. min == max) or the number of values is zero, # the function returns a tuple with nbins elements that all have a value of -1. def createHistogram(nbins, values): assert nbins > 0 nvalues = len(values) if nvalues == 0: return tuple([-1] * nbins) min_ = min(values) max_ = max(values) try: fact = 1.0 / (max_ - min_) except ZeroDivisionError: return tuple([-1] * nbins) freq = [0] * nbins for val in values: frac = (val - min_) * fact i = int(frac * nbins) assert i >= 0 i = min(i, nbins - 1) freq[i] = freq[i] + 1 p = [] for i in range(nbins): p.append(100.0 * freq[i] / nvalues) return tuple(p) # Extracts stats for a given benchmark/metric/context combination. # Returns a SingleContextBMStats object. def extractSingleContextBMStats(context_id, benchmark_id, metric_id): values = [] query_result = execQuery( "SELECT value, valid FROM result" " WHERE contextId = %s" " AND benchmarkId = %s" " AND metricId = %s;", (context_id, benchmark_id, metric_id)) for value, valid in query_result: if valid: values.append(value) min_ = max_ = median = mean = stddev = rsd = rse = -1.0 mean_is_zero = False if len(values) > 0: # Compute stats for the valid values: min_ = min(values) max_ = max(values) median = stats.medianscore(values) mean = float(stats.mean(values)) if len(values) > 1: stddev = stats.stdev(values) # standard deviation try: # relative standard deviation: rsd = 100 * abs(stddev / mean) # relative standard error: rse = 100 * (stats.sem(values) / mean) except ZeroDivisionError: mean_is_zero = True nbins = 10 # ### Hardcoded for now - should be specified by the client! hist = createHistogram(nbins, values) return SingleContextBMStats( len(query_result), len(values), min_, max_, median, mean, mean_is_zero, stddev, rsd, rse, hist, metric_id, metricIdToLowerIsBetter(metric_id), benchmark_id)