#!/usr/bin/env python import sys from dbaccess import setDatabase, execQuery, commit from misc import ( textToId, getAllSnapshots, getLastRankingSnapshot, getContext, isValidSHA1, getBMTimeSeriesStatsList) # --- BEGIN global functions ------------------------------------------------ def printUsage(): print ( "usage:" + sys.argv[0] + " --help | ") def printVerboseUsage(): printUsage() print (": The physical machine on which the results were " + "produced (e.g. barbarella or 172.24.90.79).") print (": The OS/compiler/architecture combination " + "(e.g. linux-g++-32).") print (": The product branch (e.g. 'qt 4.6', 'qt 4.7', or " + "'qt master').") print (": The tested revision within the branch. Can be " + "extracted using 'git log -1 --pretty=format:%H' (assuming the " + "tested revision is the current head revision).") print ( ": The database. One of 'bm' or 'bm-dev' (the latter " + "intended for experimentation).") # ### 2 B DOCUMENTED! def printProgress(p, lead): sys.stdout.write(lead + " ... (" + "{0:.2f}".format(p) + " %)\r") sys.stdout.flush() # ### 2 B DOCUMENTED! # NOTE: This function is currently duplicated elsewhere in JavaScipt! def changeMagnitudeScore(change): max_change = 2.0 abs_change = (1.0 / change) if change < 1 else change return (min(abs_change, max_change) - 1.0) / (max_change - 1.0) # ### 2 B DOCUMENTED! # NOTE: This function is currently duplicated elsewhere in JavaScript! def qualityScore(lsd, ni, nz, nc, mdrse): max_bad_snapshots = 30 # experimental; maybe use max durability score? max_sample_size = 5; max_LSD = max_bad_snapshots; max_NI = max_bad_snapshots * max_sample_size; max_NZ = max_bad_snapshots * max_sample_size; max_NC = max_bad_snapshots; lsd_score = min(1, lsd / float(max_LSD)); ni_score = min(1, ni / float(max_NI)); nz_score = min(1, nz / float(max_NZ)); nc_score = min(1, nc / float(max_NC)); mdrse_score = mdrse / 100.0; return (lsd_score + ni_score + nz_score + nc_score + mdrse_score) / 5.0; # Registers the ranking for a given statistic. context1_id and context2_id # refer to the first and last snapshot respectively in the interval used for # computing the rankings. # Assumptions: # - A high value should be ranked above a small one. # - A negative value is undefined and gets an invalid ranking position, i.e. -1. def registerRanking(table, stat_index, stat_name, context1_id, context2_id): table.sort(key=lambda x: x[stat_index], reverse=True) stat_id = textToId("rankingStat", stat_name) assert stat_id >= 0 row_pos = 0 ranking_pos = 0 for row in table: benchmark_id = row[0] metric_id = row[1] lc_timestamp = row[2] stat_value = row[stat_index] # The following statement ensures the following conditions: # - A negative value gets an invalid ranking position, i.e. -1 # - Equal values get the same ranking position. # - The ranking position of benchmark B indicates the number of # benchmarks ranked higher than B (i.e. having a smaller ranking # position). if stat_value < 0: ranking_pos = -1 # Note that the remaining values will now be negative, so updating # row_pos and prev_stat_value is no longer necessary! else: if (row_pos > 0) and (stat_value != prev_stat_value): ranking_pos = row_pos row_pos = row_pos + 1 prev_stat_value = stat_value # Insert or update the corresponding row in the 'ranking' table: query = ( "SELECT merge_ranking(" + str(context1_id) + ", " + str(context2_id) + ", " + str(benchmark_id) + ", " + str(metric_id) + ", " + str(lc_timestamp) + ", " + str(stat_id) + ", " + str(stat_value) + ", " + str(ranking_pos) + ");" ) execQuery(query, False) # ### 2 B DOCUMENTED! def getAllRankingStats(bmstats_list): table = [] for stats in bmstats_list: # NOTE: # - All of the ranking statistics are of type "higher is better" # (a high value is ranked better than a low value). # - Moreover, all present/defined values are non-negative. # - This means that representing absent/undefined values as -1 is ok, # since this ensures lowest ranking. benchmark_id = stats["benchmark_id"] metric_id = stats["metric_id"] lc_timestamp = stats["lc_timestamp"] lsd = stats["lsd"] ni = stats["ni"] nz = stats["nz"] nc = stats["nc"] mdrse = stats["med_of_rses"] rsemd = stats["rse_of_meds"] qs = qualityScore(lsd, ni, nz, nc, mdrse) lc = stats["lc"] if lc >= 0.0: lcgss = stats["lc_gsep_score"] lclss = stats["lc_lsep_score"] lcds1 = stats["lc_dur1_score"] lcds2 = stats["lc_dur2_score"] lcms = changeMagnitudeScore(lc) lcss1 = lcms * lcgss * lclss * lcds1 lcss = lcss1 * lcds2 if lc < 1.0: lcssr = lcss lcss1r = lcss1 lcssi = lcss1i = -1 else: lcssi = lcss lcss1i = lcss1 lcssr = lcss1r = -1 else: lcssr = lcssi = lcss1r = lcss1i = -1 table.append( (benchmark_id, metric_id, lc_timestamp, qs, lcssr, lcssi, lcss1r, lcss1i)) return table # ### 2 B DOCUMENTED! def getFirstUploadTimestamp(snapshots, sha1_id): try: return snapshots[zip(*snapshots)[0].index(sha1_id)][1] except ValueError: return -1 # ### 2 B DOCUMENTED! def updateRankings(host_id, platform_id, branch_id, sha12_id, context2_id): # Get all snapshots matching the host/platform/branch combination: sys.stdout.write("getting snapshots ... ") sys.stdout.flush() snapshots = getAllSnapshots(host_id, platform_id, branch_id) sys.stdout.write("done\n") sys.stdout.flush() # Rankings will normally be computed once a week for each # host/platform/branch combination (note the tradeoff between update # frequency and database size): ranking_interval = 3600 * 24 * 7 # secs in a week # Rankings will be updated if at least one of the following # conditions eventually becomes True: force_cond = empty_cond = interval_cond = False force_ranking = False force_ranking = True # Uncomment for testing force_cond = force_ranking if not force_cond: last_ranking_sha1_id, last_ranking_timestamp = getLastRankingSnapshot( host_id, platform_id, branch_id) empty_cond = last_ranking_sha1_id < 0 if not empty_cond: assert last_ranking_timestamp >= 0 target_timestamp = getFirstUploadTimestamp(snapshots, sha12_id) if target_timestamp < 0: print ( "failed to extract target_timestamp (error in command-line " + "args?)") sys.exit(1) interval_cond = ( (target_timestamp - last_ranking_timestamp) > ranking_interval) if not (force_cond or empty_cond or interval_cond): print ( "not updating rankings ('force', 'empty', and 'interval' " + "conditions all failed)") return print ( "updating rankings ('force' cond.: " + str(force_cond) + "; 'empty' cond.: " + str(empty_cond) + "; 'interval' cond.: " + str(interval_cond) + ") ...") # For simplicity we hardcode the tolerances for now: difftol = 1.1 durtolmin = 3 durtolmax = 30 # Determine the target snapshot range: # (The range should end at the snapshot given on the command-line and begin # at the snapshot that is 2 * durtolmax snapshots back in time, or, if no # such snapshot exists, the first available snapshot.) try: sha12_pos = zip(*snapshots)[0].index(sha12_id) except ValueError: print "no observations found for SHA-1 ID:", sha12_id sys.exit(1) sha11_pos = max(0, (sha12_pos - 2 * durtolmax) + 1) snapshots = snapshots[sha11_pos:(sha12_pos + 1)] if len(snapshots) < 2: print ( "no observations found before SHA-1 ID: " + sha12_id + " (computing rankings makes no sense)") sys.exit(1) # Get time series statistics for all benchmarks: bmstats_list = getBMTimeSeriesStatsList( host_id, platform_id, branch_id, snapshots, None, difftol, durtolmin, durtolmax, printProgress, "getting time series statistics") sys.stdout.write("\n") # *** Compute rankings ************************************************** # Step 1: Create a table containing all ranking statistics (one row per # benchmark/metric): sys.stdout.write("creating table for all ranking stats ... ") sys.stdout.flush() table = getAllRankingStats(bmstats_list) sys.stdout.write("done\n") sys.stdout.flush() # Step 2: Sort the table individually for each ranking statistic and # register the ranking positions in the database: context1_id = getContext(host_id, platform_id, branch_id, snapshots[0][0]) if context1_id == -1: print "fatal error: failed to find context for start snapshot" sys.exit(1) nameToIndex = { "QS": 3, "LCSSR": 4, "LCSSI": 5, "LCSS1R": 6, "LCSS1I": 7 } for name in nameToIndex: sys.stdout.write("registering ranking for " + name + " ...\r") sys.stdout.flush() registerRanking( table, nameToIndex[name], name, context1_id, context2_id) sys.stdout.write("\n") # --- END global functions ------------------------------------------------ # --- BEGIN main program ------------------------------------------------ if ((len(sys.argv) > 1) and (sys.argv[1] == "--help")): printVerboseUsage() sys.exit(1) if (len(sys.argv) != 6): printUsage() sys.exit(1) host = sys.argv[1] platform = sys.argv[2] branch = sys.argv[3] sha12 = sys.argv[4] if (not isValidSHA1(sha12)): print "invalid SHA-1:", sha12 sys.exit(1) db = sys.argv[5] setDatabase(db) host_id = textToId("host", host) if host_id == -1: print "no such host:", host sys.exit(1) platform_id = textToId("platform", platform) if platform_id == -1: print "no such platform:", platform sys.exit(1) branch_id = textToId("branch", branch) if branch_id == -1: print "no such branch:", branch sys.exit(1) sha12_id = textToId("sha1", sha12) if sha12_id == -1: print "no such SHA-1:", sha12 sys.exit(1) context2_id = getContext(host_id, platform_id, branch_id, sha12_id) if context2_id == -1: print "no results found for this host/platform/branch/SHA-1 combination" sys.exit(1) updateRankings(host_id, platform_id, branch_id, sha12_id, context2_id) # Write to database: commit() print "finalization done" # --- END main program ------------------------------------------------