1 files changed, 365 insertions, 0 deletions
diff --git a/scripts/computerankings.py b/scripts/computerankings.py
new file mode 100755
index 0000000..6726806
--- /dev/null
+++ b/scripts/computerankings.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python
+
+import sys
+from dbaccess import setDatabase, execQuery, commit
+from misc import (
+    getOptions, textToId, getAllSnapshots, getLastRankingSnapshot, getContext,
+    isValidSHA1, getBMTimeSeriesStatsList)
+
+
+# --- BEGIN Global functions ----------------------------------------------
+
+def printUsage():
+    print (
+        "usage:" + sys.argv[0] +
+        " --help | [--dbhost H] [--dbport P] --db D --host H --platform P "
+        "--branch B --sha1 S [--noprogress NP]")
+
+def printVerboseUsage():
+    printUsage()
+    print "\noptions:"
+    print(
+        "    --help: This help.")
+    print(
+        "  --dbhost: The database server host (overriding the default).")
+    print(
+        "  --dbport: The database server port (overriding the default).")
+    print(
+        "      --db: The database. One of 'bm' or 'bm-dev' (the latter "
+        "intended for experimentation).")
+    print(
+        "    --host: The physical machine on which the results were "
+        "produced (e.g. barbarella or 172.24.90.79).")
+    print(
+        "--platform: The OS/compiler/architecture combination "
+        "(e.g. linux-g++-32).")
+    print(
+        "  --branch: The product branch (e.g. 'qt 4.6', 'qt 4.7', or "
+        "'qt master').")
+    print(
+        "    --sha1: The tested revision within the branch. Can be "
+        "extracted using 'git log -1 --pretty=format:%H' (assuming the "
+        "tested revision is the current head revision).")
+    print(
+        "  --noprogress: Specify \'true\' to disable progress indicator.")
+
+
+# ### 2 B DOCUMENTED!
+def printProgress(p, lead):
+    sys.stdout.write(lead + " ... (" + "{0:.2f}".format(p) + " %)\r")
+    sys.stdout.flush()
+
+
+# ### 2 B DOCUMENTED!
+# NOTE: This function is currently duplicated elsewhere in JavaScipt!
+def changeMagnitudeScore(change):
+    max_change = 2.0
+    abs_change = (1.0 / change) if change < 1 else change
+    return (min(abs_change, max_change) - 1.0) / (max_change - 1.0)
+
+
+# ### 2 B DOCUMENTED!
+# NOTE: This function is currently duplicated elsewhere in JavaScript!
+def qualityScore(lsd, ni, nz, nc, mdrse):
+    max_bad_snapshots = 30 # experimental; maybe use max durability score?
+    max_sample_size = 5;
+    max_LSD = max_bad_snapshots;
+    max_NI = max_bad_snapshots * max_sample_size;
+    max_NZ = max_bad_snapshots * max_sample_size;
+    max_NC = max_bad_snapshots;
+
+    lsd_score = 0 if (lsd == -1) else min(1, lsd / float(max_LSD));
+    ni_score = min(1, ni / float(max_NI));
+    nz_score = min(1, nz / float(max_NZ));
+    nc_score = min(1, nc / float(max_NC));
+    mdrse_score = 0 if (mdrse == -1) else (mdrse / 100.0);
+
+    return (lsd_score + ni_score + nz_score + nc_score + mdrse_score) / 5.0;
+
+
+# Registers the ranking for a given statistic. context1_id and context2_id
+# refer to the first and last snapshot respectively in the interval used for
+# computing the rankings.
+# Assumptions:
+# - A high value should be ranked above a small one.
+# - A negative value is undefined and gets an invalid ranking position, i.e. -1.
+def registerRanking(table, stat_index, stat_name, context1_id, context2_id):
+
+    table.sort(key=lambda x: x[stat_index], reverse=True)
+
+    stat_id = textToId("rankingStat", stat_name)
+    assert stat_id >= 0
+
+    row_pos = 0
+    ranking_pos = 0
+    for row in table:
+        benchmark_id = row[0]
+        metric_id = row[1]
+        lc_timestamp = row[2]
+        stat_value = row[stat_index]
+
+        # The following statement ensures the following conditions:
+        # - A negative value gets an invalid ranking position, i.e. -1
+        # - Equal values get the same ranking position.
+        # - The ranking position of benchmark B indicates the number of
+        #   benchmarks ranked higher than B (i.e. having a smaller ranking
+        #   position).
+        if stat_value < 0:
+            ranking_pos = -1
+            # Note that the remaining values will now be negative, so updating
+            # row_pos and prev_stat_value is no longer necessary!
+        else:
+            if (row_pos > 0) and (stat_value != prev_stat_value):
+                ranking_pos = row_pos
+            row_pos = row_pos + 1
+            prev_stat_value = stat_value
+
+        # Insert or update the corresponding row in the 'ranking' table:
+        query = (
+            "SELECT merge_ranking("
+            + str(context1_id)
+            + ", " + str(context2_id)
+            + ", " + str(benchmark_id)
+            + ", " + str(metric_id)
+            + ", " + str(lc_timestamp)
+            + ", " + str(stat_id)
+            + ", " + str(stat_value)
+            + ", " + str(ranking_pos)
+            + ");"
+            )
+        execQuery(query, False)
+
+
+# ### 2 B DOCUMENTED!
+def getAllRankingStats(bmstats_list):
+    table = []
+    for stats in bmstats_list:
+
+        # NOTE:
+        # - All of the ranking statistics are of type "higher is better"
+        #   (a high value is ranked better than a low value).
+        # - Moreover, all present/defined values are non-negative.
+        # - This means that representing absent/undefined values as -1 is ok,
+        #   since this ensures lowest ranking.
+
+        benchmark_id = stats["benchmark_id"]
+        metric_id = stats["metric_id"]
+        lc_timestamp = stats["lc_timestamp"]
+        lsd = stats["lsd"]
+        ni = stats["ni"]
+        nz = stats["nz"]
+        nc = stats["nc"]
+        mdrse = stats["med_of_rses"]
+        rsemd = stats["rse_of_meds"]
+
+        qs = qualityScore(lsd, ni, nz, nc, mdrse)
+
+        lc = stats["lc"]
+        if lc >= 0.0:
+            lcgss = stats["lc_gsep_score"]
+            lclss = stats["lc_lsep_score"]
+            lcds1 = stats["lc_dur1_score"]
+            lcds2 = stats["lc_dur2_score"]
+            lcms = changeMagnitudeScore(lc)
+            lcss1 = (lcms + lcgss + lclss + lcds1) / 4.0
+            lcss = (lcms + lcgss + lclss + lcds1 + lcds2) / 5.0
+            if lc < 1.0:
+                lcssr = lcss
+                lcss1r = lcss1
+                lcssi = lcss1i = -1
+            else:
+                lcssi = lcss
+                lcss1i = lcss1
+                lcssr = lcss1r = -1
+        else:
+            lcssr = lcssi = lcss1r = lcss1i = -1
+
+        table.append(
+            (benchmark_id, metric_id, lc_timestamp, qs, lcssr, lcssi, lcss1r,
+             lcss1i))
+
+    return table
+
+
+# ### 2 B DOCUMENTED!
+def getFirstUploadTimestamp(snapshots, sha1_id):
+    try:
+        return snapshots[zip(*snapshots)[0].index(sha1_id)][1]
+    except ValueError:
+        return -1
+
+
+# ### 2 B DOCUMENTED!
+def updateRankings(
+    host_id, platform_id, branch_id, sha12_id, context2_id, no_progress):
+
+    # Get all snapshots matching the host/platform/branch combination:
+    sys.stdout.write("getting snapshots ... ")
+    sys.stdout.flush()
+    snapshots = getAllSnapshots(host_id, platform_id, branch_id)
+    sys.stdout.write("done\n")
+    sys.stdout.flush()
+
+
+    # Rankings will normally be computed once a day for each
+    # host/platform/branch combination (note the tradeoff between update
+    # frequency and database size):
+    ranking_interval = 3600 * 24 # secs in a day
+
+    # Rankings will be updated if at least one of the following
+    # conditions eventually becomes True:
+    force_cond = empty_cond = interval_cond = False
+
+    force_ranking = False
+    #force_ranking = True # Uncomment for testing
+
+    force_cond = force_ranking
+
+    if not force_cond:
+        last_ranking_sha1_id, last_ranking_timestamp = getLastRankingSnapshot(
+            host_id, platform_id, branch_id)
+        empty_cond = last_ranking_sha1_id < 0
+        if not empty_cond:
+            assert last_ranking_timestamp >= 0
+
+            target_timestamp = getFirstUploadTimestamp(snapshots, sha12_id)
+            if target_timestamp < 0:
+                print (
+                "error: failed to extract target_timestamp "
+                "(error in command-line args?)")
+                sys.exit(1)
+
+            interval_cond = (
+                (target_timestamp - last_ranking_timestamp) > ranking_interval)
+
+    if not (force_cond or empty_cond or interval_cond):
+        print (
+            "not updating rankings ('force', 'empty', and 'interval' "
+            "conditions all failed)")
+        return
+
+    print (
+        "updating rankings ('force' cond.: " + str(force_cond) +
+        "; 'empty' cond.: " + str(empty_cond) +
+        "; 'interval' cond.: " + str(interval_cond) + ") ...")
+
+    # For simplicity we hardcode the tolerances for now:
+    difftol = 1.1
+    durtolmin = 3
+    durtolmax = 30
+
+    # Determine the target snapshot range:
+    # (The range should end at the snapshot given on the command-line and begin
+    # at the snapshot that is 2 * durtolmax snapshots back in time, or, if no
+    # such snapshot exists, the first available snapshot.)
+    try:
+        sha12_pos = zip(*snapshots)[0].index(sha12_id)
+    except ValueError:
+        print "no observations found for SHA-1 ID:", sha12_id
+        sys.exit(1)
+    sha11_pos = max(0, (sha12_pos - 2 * durtolmax) + 1)
+    snapshots = snapshots[sha11_pos:(sha12_pos + 1)]
+    if len(snapshots) < 2:
+        print (
+            "no observations found before SHA-1 ID: " + sha12_id +
+            " (computing rankings makes no sense)")
+        sys.exit(1)
+
+    # Get time series statistics for all benchmarks:
+    if no_progress:
+        sys.stdout.write("getting time series statistics ... ")
+    bmstats_list = getBMTimeSeriesStatsList(
+        host_id, platform_id, branch_id, snapshots, None, difftol, durtolmin,
+        durtolmax, None if no_progress else printProgress,
+        "getting time series statistics")
+
+    if no_progress:
+        sys.stdout.write("done\n")
+    else:
+        sys.stdout.write("\n")
+
+
+    # *** Compute rankings **************************************************
+
+    # Step 1: Create a table containing all ranking statistics (one row per
+    #         benchmark/metric):
+    sys.stdout.write("creating table for all ranking stats ... ")
+    sys.stdout.flush()
+    table = getAllRankingStats(bmstats_list)
+    sys.stdout.write("done\n")
+    sys.stdout.flush()
+
+    # Step 2: Sort the table individually for each ranking statistic and
+    # register the ranking positions in the database:
+    context1_id = getContext(host_id, platform_id, branch_id, snapshots[0][0])
+    if context1_id == -1:
+        print "error: failed to find context for start snapshot"
+        sys.exit(1)
+    nameToIndex = { "QS": 3, "LCSSR": 4, "LCSSI": 5, "LCSS1R": 6, "LCSS1I": 7 }
+    for name in nameToIndex:
+        sys.stdout.write("registering ranking for " + name + " ... ")
+        sys.stdout.flush()
+        registerRanking(
+            table, nameToIndex[name], name, context1_id, context2_id)
+        sys.stdout.write("done\n")
+        sys.stdout.flush()
+
+# --- END Global functions ----------------------------------------------
+
+
+# --- BEGIN Main program ----------------------------------------------
+
+options, http_get = getOptions()
+
+if "help" in options:
+    printVerboseUsage()
+    sys.exit(0)
+
+if (not ("db" in options and "host" in options and "platform" in options and
+    "branch" in options and "sha1" in options)):
+    printUsage()
+    sys.exit(0)
+
+if not isValidSHA1(options["sha1"]):
+    print "error: invalid SHA-1:", options["sha1"]
+    sys.exit(1)
+
+setDatabase(
+    options["dbhost"] if "dbhost" in options else None,
+    options["dbport"] if "dbport" in options else None,
+    options["db"])
+
+host_id = textToId("host", options["host"])
+if host_id == -1:
+    print "error: no such host:", options["host"]
+    sys.exit(1)
+platform_id = textToId("platform", options["platform"])
+if platform_id == -1:
+    print "error: no such platform:", options["platform"]
+    sys.exit(1)
+branch_id = textToId("branch", options["branch"])
+if branch_id == -1:
+    print "error: no such branch:", options["branch"]
+    sys.exit(1)
+sha12_id = textToId("sha1", options["sha1"])
+if sha12_id == -1:
+    print "error: no such SHA-1:", options["sha1"]
+    sys.exit(1)
+
+context2_id = getContext(host_id, platform_id, branch_id, sha12_id)
+if context2_id == -1:
+    print "error: no results found for this context"
+    sys.exit(1)
+
+updateRankings(
+    host_id, platform_id, branch_id, sha12_id, context2_id,
+    ("noprogress" in options) and (
+        (options["noprogress"] == "1")
+        or (options["noprogress"].lower() == "true")))
+
+# Write to database:
+commit()
+
+print "rankings computation done"
+
+# --- END Main program ----------------------------------------------