import sys import json from dbaccess import execQuery, database from singlecontextbmstats import ( SingleContextBMStats, extractSingleContextBMStats) import math from statlib import stats from misc import ( idToText, textToId, benchmarkToComponents, getContext, printJSONHeader) class GetStats2: def __init__( self, host1, platform1, branch1, sha11, host2, platform2, branch2, sha12, test_case_filter): self.host1 = host1 self.platform1 = platform1 self.branch1 = branch1 self.sha11 = sha11 self.host2 = host2 self.platform2 = platform2 self.branch2 = branch2 self.sha12 = sha12 self.test_case_filter = test_case_filter self.host1_id = textToId("host", host1) self.platform1_id = textToId("platform", platform1) self.branch1_id = textToId("branch", branch1) self.sha11_id = textToId("sha1", sha11) self.host2_id = textToId("host", host2) self.platform2_id = textToId("platform", platform2) self.branch2_id = textToId("branch", branch2) self.sha12_id = textToId("sha1", sha12) def execute(self): self.overall_stats = self.computeOverallStats() self.bmstats_list = self.computeBMStatsList() self.per_bm_stats = [] log2fact = 1.0 / math.log(2) median_improv_list = [] self.test_case_exists = {} for bmstats1, bmstats2 in self.bmstats_list: # assert bmstats1.metric_id == bmstats2.metric_id # assert bmstats1.lower_is_better == bmstats2.lower_is_better # assert bmstats1.benchmark_id == bmstats2.benchmark_id s_median1 = s_median2 = s_median_improv = "" s_rse1 = s_rse2 = s_rse_improv = "" if bmstats1.nvalid > 0 and bmstats2.nvalid > 0: s_median1 = str(bmstats1.median) s_median2 = str(bmstats2.median) if (bmstats1.median > 0 and bmstats2.median > 0): if bmstats1.lower_is_better: median_improv = ( math.log(bmstats1.median / float(bmstats2.median)) * log2fact) else: median_improv = ( math.log(bmstats2.median / float(bmstats1.median)) * log2fact) s_median_improv = str(median_improv) median_improv_list.append(median_improv) if bmstats1.nvalid > 1 and not bmstats1.mean_is_zero: s_rse1 = str(bmstats1.rse) if bmstats2.nvalid > 1 and not bmstats2.mean_is_zero: s_rse2 = str(bmstats2.rse) if (bmstats1.nvalid > 1 and bmstats2.nvalid > 1 and not bmstats1.mean_is_zero and not bmstats2.mean_is_zero): s_rse_improv = str((bmstats1.rse - bmstats2.rse) / 100.0) s_metric = idToText("metric", bmstats1.metric_id) s_lower_is_better = "1" if bmstats1.lower_is_better else "0" benchmark = idToText("benchmark", bmstats1.benchmark_id) s_test_case, s_test_function, s_data_tag = ( benchmarkToComponents(benchmark)) # Register test case existence: if not s_test_case in self.test_case_exists: self.test_case_exists[s_test_case] = True self.per_bm_stats.append( [ s_median1, s_median2, s_median_improv, s_rse1, s_rse2, s_rse_improv, s_metric, s_lower_is_better, benchmark ] ) # Overall stats: self.s_mean_of_median_improvs = "" self.s_perc_10_of_median_improvs = "" self.s_perc_25_of_median_improvs = "" self.s_perc_50_of_median_improvs = "" self.s_perc_75_of_median_improvs = "" self.s_perc_90_of_median_improvs = "" self.s_better_than_fact_1_1_count = "" self.s_worse_than_fact_inv_1_1_count = "" self.nmedian_improvs = len(median_improv_list) if self.nmedian_improvs > 0: self.s_mean_of_median_improvs = str(stats.mean(median_improv_list)) self.s_perc_10_of_median_improvs = str( self.valueForPercentile(median_improv_list, 10)) self.s_perc_25_of_median_improvs = str( self.valueForPercentile(median_improv_list, 25)) self.s_perc_50_of_median_improvs = str( self.valueForPercentile(median_improv_list, 50)) self.s_perc_75_of_median_improvs = str( self.valueForPercentile(median_improv_list, 75)) self.s_perc_90_of_median_improvs = str( self.valueForPercentile(median_improv_list, 90)) self.s_better_than_fact_1_1_count = str( self.higherThanCount( median_improv_list, math.log(1.1) * log2fact)) self.s_worse_than_fact_inv_1_1_count = str( self.lowerThanCount( median_improv_list, math.log(1.0 / 1.1) * log2fact)) self.writeOutput() # Computes and returns a dictionary of overall statistics. def computeOverallStats(self): stats = {} # 2 B DONE! return stats # Returns the value below which 'p' % of the observations # in 'values' fall. def valueForPercentile(self, values, p): assert len(values) > 0 assert p >= 0 and p <= 100 if p == 100: return max(values) i = int(math.floor((len(values) - 1) * (p / 100.0) + 0.5)) i = min(max(i, 0), len(values) - 1) values.sort() return values[i] # Returns the number of observations in 'values' that are lower than 'v'. def lowerThanCount(self, values, v): n = 0 for v_ in values: if v_ < v: n = n + 1; return n # Returns the number of observations in 'values' that are higher than 'v'. def higherThanCount(self, values, v): n = 0 for v_ in values: if v_ > v: n = n + 1; return n # Computes per-benchmark statistics. Returns an n-tuple of # 2-tuples, each of which consists of the SingleContextBMStats objects for # context 1 and 2 respectively. def computeBMStatsList(self): context1_id = getContext( self.host1_id, self.platform1_id, self.branch1_id, self.sha11_id) context2_id = getContext( self.host2_id, self.platform2_id, self.branch2_id, self.sha12_id) # Get all distinct benchmark/metric combinations matching both contexts # into a 'benchmarks' list: bmark_metrics = execQuery( "SELECT DISTINCT benchmarkId, metricId" " FROM result" " WHERE contextId = %s" " INTERSECT " "SELECT DISTINCT benchmarkId, metricId" " FROM result" " WHERE contextId = %s" #" LIMIT 10" ";", (context1_id, context2_id) ) bmstats_list = [] # Loop over benchmarks and compute stats for both contexts: for benchmark_id, metric_id in bmark_metrics: benchmark = idToText("benchmark", benchmark_id) test_case, test_function, data_tag = ( benchmarkToComponents(benchmark)) if ((self.test_case_filter != None) and (not test_case in self.test_case_filter)): continue bmstats1 = extractSingleContextBMStats( context1_id, benchmark_id, metric_id) bmstats2 = extractSingleContextBMStats( context2_id, benchmark_id, metric_id) bmstats_list.append(tuple([bmstats1, bmstats2])) return tuple(bmstats_list) def writeOutputAsJSON(self): printJSONHeader() json.dump({ 'database': database(), 'host1': self.host1, 'platform1': self.platform1, 'branch1': self.branch1, 'sha11': self.sha11, 'host2': self.host2, 'platform2': self.platform2, 'branch2': self.branch2, 'sha12': self.sha12, 'per_bm_stats': self.per_bm_stats, 'nbenchmarks': len(self.bmstats_list), 'ntestcases': len(self.test_case_exists), 'nmedian_improvs': self.nmedian_improvs, 'mean_of_median_improvs': self.s_mean_of_median_improvs, 'perc_10_of_median_improvs': self.s_perc_10_of_median_improvs, 'perc_25_of_median_improvs': self.s_perc_25_of_median_improvs, 'perc_50_of_median_improvs': self.s_perc_50_of_median_improvs, 'perc_75_of_median_improvs': self.s_perc_75_of_median_improvs, 'perc_90_of_median_improvs': self.s_perc_90_of_median_improvs, 'better_than_fact_1_1_count': self.s_better_than_fact_1_1_count, 'worse_than_fact_inv_1_1_count': self.s_worse_than_fact_inv_1_1_count }, sys.stdout) class GetStats2AsJSON(GetStats2): def writeOutput(self): self.writeOutputAsJSON()