scripts/getrankings.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

import sys
import json
from dbaccess import execQuery, database
from misc import (
    textToId, idToText, getContext, getTimestampFromContext, getSnapshots,
    getRankingContexts, benchmarkToComponents, printJSONHeader)

class GetRankings:

    def __init__(
        self, host, platform, branch, sha12, test_case_filter, maxsize):
        self.host = host
        self.host_id = textToId('host', self.host)
        self.platform = platform
        self.platform_id = textToId('platform', self.platform)
        self.branch = branch
        self.branch_id = textToId('branch', self.branch)
        self.context2_id = getContext(
            self.host_id, self.platform_id, self.branch_id,
            textToId('sha1', sha12))
        self.test_case_filter = test_case_filter
        self.maxsize = maxsize


    # Returns -1, 0, and 1 if ranking position x is considered less than,
    # equal to, and greater than ranking position y respectively.
    # Note: a negative ranking position is considered worse (i.e. effectively
    # treated as having an "infinite" ranking position) than any non-negative
    # ranking position.
    def cmp_rank_pos(self, x, y):
        if x < 0:
            return 1
        elif y < 0:
            return -1
        elif x < y:
            return -1
        elif x > y:
            return 1
        else:
            return 0


    # Gets all rankings matching the context/metric combination combination.
    def getRankings(self):

        if self.context2_id < 0:
            print "error: invalid context"
            sys.exit(1)

        # Find the previous context (if any) for which rankings exist:
        ranking_contexts = getRankingContexts(
            self.host_id, self.platform_id, self.branch_id)
        curr_index = zip(*ranking_contexts)[0].index(self.context2_id)
        if curr_index < (len(ranking_contexts) - 1):
            context2_prev_id = ranking_contexts[curr_index + 1][0]
        else:
            context2_prev_id = -1 # No rankings before this context

        rankings = {}
        context_ids = set([self.context2_id]) # Affected context IDs


        # Get all time series notes:
        qres = execQuery(
            "SELECT benchmarkId, metricId, note FROM timeSeriesAnnotation"
            " WHERE hostId = %s AND platformId = %s AND branchId = %s",
            (self.host_id, self.platform_id, self.branch_id))
        notes = {}
        for benchmark_id, metric_id, note in qres:
            notes[benchmark_id, metric_id] = note


        # Get rankings for each statistic:
        stat_infos = execQuery("SELECT id, value FROM rankingStat", ())
        for stat_id, stat_name in stat_infos:

            # Get the unsorted ranking information:
            ranking_all = execQuery(
                "SELECT benchmarkId, metricId, context1Id, pos, value,"
                " lastChangeTimestamp"
                " FROM ranking"
                " WHERE context2Id = %s"
                " AND statId = %s",
                (self.context2_id, stat_id))

            ranking = []

            # Apply test case filter and add notes:
            for row in ranking_all:
                benchmark_id = row[0]
                benchmark = idToText("benchmark", benchmark_id)
                test_case, test_function, data_tag = (
                    benchmarkToComponents(benchmark))
                if ((self.test_case_filter == None)
                    or (test_case in self.test_case_filter)):

                    # Append note if any:
                    metric_id = row[1]
                    try:
                        note = notes[benchmark_id, metric_id]
                    except KeyError:
                        note = ""

                    ranking.append((
                            benchmark_id, metric_id, row[2], row[3], row[4],
                            row[5], note))


            for row in ranking:
                context_ids.add(row[2])

            # Sort the table in ascending order on the 'pos' column, but
            # so that negative positions are ranked below any other positions:
            ranking.sort(key=lambda row: row[3], cmp=self.cmp_rank_pos)

            # Keep only the 'maxsize' highest ranked benchmarks:
            ranking = ranking if (self.maxsize < 0) else ranking[:self.maxsize]

            if context2_prev_id >= 0:
                # Compute deltas from previous ranking:
                ranking_prev_list = execQuery(
                    "SELECT benchmarkId, metricId, pos"
                    " FROM ranking"
                    " WHERE context2Id = %s"
                    " AND statId = %s",
                    (context2_prev_id, stat_id))
                ranking_prev = {}
                for benchmark_id, metric_id, pos in ranking_prev_list:
                    ranking_prev[benchmark_id, metric_id] = pos

                # Append deltas where applicable:
                ranking_without_deltas = ranking
                ranking = []

                for (benchmark_id, metric_id, context1_id, pos, value,
                     lc_timestamp, note) in ranking_without_deltas:
                    row = [benchmark_id, metric_id, context1_id, pos, value,
                           lc_timestamp, note]
                    if pos >= 0:
                        try:
                            pos_prev = ranking_prev[benchmark_id, metric_id]
                            if pos_prev >= 0:
                                delta = pos_prev - pos
                                row.append(delta)
                        except KeyError:
                            pass
                    ranking.append(row)


            # Add to main list:
            rankings[stat_name.lower()] = ranking;


        # Extract affected SHA-1s:
        assert len(context_ids) > 0
        sha1_infos = execQuery(
            "SELECT context.id, sha1Id, sha1.value"
            " FROM context, sha1"
            " WHERE context.id IN"
            " (%s" + ", %s"*(len(context_ids) - 1) + ")" +
            " AND sha1Id = sha1.id",
            tuple(context_ids))


        return sha1_infos, rankings


    # Extracts the individual snapshots in the maximum range spanned by
    # the SHA-1s in sha1_infos:
    def getSnapshotsInMaxRange(self, sha1_infos):

        min_timestamp = max_timestamp = first_sha1_id = last_sha1_id = None
        for context_id, sha1_id, sha1 in sha1_infos:
            timestamp = getTimestampFromContext(context_id)
            if min_timestamp == None:
                min_timestamp = max_timestamp = timestamp
                first_sha1_id = last_sha1_id = sha1_id
            elif timestamp < min_timestamp:
                min_timestamp = timestamp
                first_sha1_id = sha1_id
            elif timestamp > max_timestamp:
                max_timestamp = timestamp
                last_sha1_id = sha1_id

        snapshots = getSnapshots(
            self.host_id, self.platform_id, self.branch_id, first_sha1_id,
            last_sha1_id)

        return snapshots


    def execute(self):
        self.sha1_infos, self.rankings = self.getRankings()
        self.snapshots = self.getSnapshotsInMaxRange(self.sha1_infos)

        self.benchmarks = execQuery("SELECT id, value FROM benchmark", ())
        self.metrics = execQuery("SELECT id, value FROM metric", ())

        self.writeOutput()


    def writeOutputAsJSON(self):
        printJSONHeader()
        json.dump({
                'database': database(),
                'host': self.host,
                'platform': self.platform,
                'branch': self.branch,
                'benchmarks': self.benchmarks,
                'metrics': self.metrics,
                'snapshots': map(
                    lambda s: (idToText("sha1", s[0]), s[1]), self.snapshots),
                'rankings': self.rankings
                }, sys.stdout)


class GetRankingsAsJSON(GetRankings):
    def writeOutput(self):
        self.writeOutputAsJSON()