summaryrefslogtreecommitdiffstats
path: root/chromium/build/toolchain/win/midl.py
blob: 6d6fab05a50edc738b24801c8ee0651ad254cf7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from __future__ import division
from __future__ import print_function

import array
import difflib
import distutils.dir_util
import filecmp
import io
import operator
import os
import re
import shutil
import struct
import subprocess
import sys
import tempfile
import uuid

from functools import reduce


def ZapTimestamp(filename):
  contents = open(filename, 'rb').read()
  # midl.exe writes timestamp 2147483647 (2^31 - 1) as creation date into its
  # outputs, but using the local timezone.  To make the output timezone-
  # independent, replace that date with a fixed string of the same length.
  # Also blank out the minor version number.
  if filename.endswith('.tlb'):
    # See https://chromium-review.googlesource.com/c/chromium/src/+/693223 for
    # a fairly complete description of the .tlb binary format.
    # TLB files start with a 54 byte header. Offset 0x20 stores how many types
    # are defined in the file, and the header is followed by that many uint32s.
    # After that, 15 section headers appear.  Each section header is 16 bytes,
    # starting with offset and length uint32s.
    # Section 12 in the file contains custom() data. custom() data has a type
    # (int, string, etc).  Each custom data chunk starts with a uint16_t
    # describing its type.  Type 8 is string data, consisting of a uint32_t
    # len, followed by that many data bytes, followed by 'W' bytes to pad to a
    # 4 byte boundary.  Type 0x13 is uint32 data, followed by 4 data bytes,
    # followed by two 'W' to pad to a 4 byte boundary.
    # The custom block always starts with one string containing "Created by
    # MIDL version 8...", followed by one uint32 containing 0x7fffffff,
    # followed by another uint32 containing the MIDL compiler version (e.g.
    # 0x0801026e for v8.1.622 -- 0x26e == 622).  These 3 fields take 0x54 bytes.
    # There might be more custom data after that, but these 3 blocks are always
    # there for file-level metadata.
    # All data is little-endian in the file.
    assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
    ntypes, = struct.unpack_from('<I', contents, 0x20)
    custom_off, custom_len = struct.unpack_from(
        '<II', contents, 0x54 + 4*ntypes + 11*16)
    assert custom_len >= 0x54
    # First: Type string (0x8), followed by 0x3e characters.
    assert contents[custom_off:custom_off + 6] == b'\x08\x00\x3e\x00\x00\x00'
    assert re.match(
        br'Created by MIDL version 8\.\d\d\.\d{4} at ... Jan 1. ..:..:.. 2038\n',
        contents[custom_off + 6:custom_off + 6 + 0x3e])
    # Second: Type uint32 (0x13) storing 0x7fffffff (followed by WW / 0x57 pad)
    assert contents[custom_off+6+0x3e:custom_off+6+0x3e+8] == \
        b'\x13\x00\xff\xff\xff\x7f\x57\x57'
    # Third: Type uint32 (0x13) storing MIDL compiler version.
    assert contents[custom_off + 6 + 0x3e + 8:custom_off + 6 + 0x3e + 8 +
                    2] == b'\x13\x00'
    # Replace "Created by" string with fixed string, and fixed MIDL version with
    # 8.1.622 always.
    contents = (
        contents[0:custom_off + 6] +
        b'Created by MIDL version 8.xx.xxxx at a redacted point in time\n' +
        # uint32 (0x13) val 0x7fffffff, WW, uint32 (0x13), val 0x0801026e, WW
        b'\x13\x00\xff\xff\xff\x7f\x57\x57\x13\x00\x6e\x02\x01\x08\x57\x57' +
        contents[custom_off + 0x54:])
  else:
    contents = re.sub(
        br'File created by MIDL compiler version 8\.\d\d\.\d{4} \*/\r\n'
        br'/\* at ... Jan 1. ..:..:.. 2038',
        br'File created by MIDL compiler version 8.xx.xxxx */\r\n'
        br'/* at a redacted point in time', contents)
    contents = re.sub(
        br'    Oicf, W1, Zp8, env=(.....) \(32b run\), '
        br'target_arch=(AMD64|X86) 8\.\d\d\.\d{4}',
        br'    Oicf, W1, Zp8, env=\1 (32b run), target_arch=\2 8.xx.xxxx',
        contents)
    # TODO(thakis): If we need more hacks than these, try to verify checked-in
    # outputs when we're using the hermetic toolchain.
    # midl.exe older than 8.1.622 omit '//' after #endif, fix that:
    contents = contents.replace(b'#endif !_MIDL_USE_GUIDDEF_',
                                b'#endif // !_MIDL_USE_GUIDDEF_')
    # midl.exe puts the midl version into code in one place.  To have
    # predictable output, lie about the midl version if it's not 8.1.622.
    # This is unfortunate, but remember that there's beauty too in imperfection.
    contents = contents.replace(b'0x801026c, /* MIDL Version 8.1.620 */',
                                b'0x801026e, /* MIDL Version 8.1.622 */')
  open(filename, 'wb').write(contents)


def overwrite_cls_guid_h(h_file, dynamic_guid):
  contents = open(h_file, 'rb').read()
  contents = re.sub(br'class DECLSPEC_UUID\("[^"]*"\)',
                    br'class DECLSPEC_UUID("%s")' % str(dynamic_guid).encode(),
                    contents)
  open(h_file, 'wb').write(contents)


def overwrite_cls_guid_iid(iid_file, dynamic_guid):
  contents = open(iid_file, 'rb').read()
  hexuuid = '0x%08x,0x%04x,0x%04x,' % dynamic_guid.fields[0:3]

  # dynamic_guid.bytes is a bytestring in Py3, but a normal string in Py2.
  if sys.version_info.major == 2:
    hexuuid += ','.join('0x%02x' % ord(b) for b in dynamic_guid.bytes[8:])
  else:
    hexuuid += ','.join('0x%02x' % b for b in dynamic_guid.bytes[8:])

  contents = re.sub(br'MIDL_DEFINE_GUID\(CLSID, ([^,]*),[^)]*\)',
                    br'MIDL_DEFINE_GUID(CLSID, \1,%s)' % hexuuid.encode(),
                    contents)
  open(iid_file, 'wb').write(contents)


def overwrite_cls_guid_tlb(tlb_file, dynamic_guid):
  # See ZapTimestamp() for a short overview of the .tlb format.  The 1st
  # section contains type descriptions, and the first type should be our
  # coclass.  It points to the type's GUID in section 6, the GUID section.
  contents = open(tlb_file, 'rb').read()
  assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
  ntypes, = struct.unpack_from('<I', contents, 0x20)
  type_off, type_len = struct.unpack_from('<II', contents, 0x54 + 4*ntypes)

  # contents is a bytestring in Python 3, but a normal string in Py2.
  if sys.version_info.major == 2:
    coclass = ord(contents[type_off])
  else:
    coclass = contents[type_off]
  assert coclass == 0x25, "expected coclass"

  guidind = struct.unpack_from('<I', contents, type_off + 0x2c)[0]
  guid_off, guid_len = struct.unpack_from(
      '<II', contents, 0x54 + 4*ntypes + 5*16)
  assert guidind + 14 <= guid_len
  contents = array.array('B', contents)
  struct.pack_into('<IHH8s', contents, guid_off + guidind,
                   *(dynamic_guid.fields[0:3] + (dynamic_guid.bytes[8:],)))
  # The GUID is correct now, but there's also a GUID hashtable in section 5.
  # Need to recreate that too.  Since the hash table uses chaining, it's
  # easiest to recompute it from scratch rather than trying to patch it up.
  hashtab = [0xffffffff] * (0x80 // 4)
  for guidind in range(guid_off, guid_off + guid_len, 24):
    guidbytes, typeoff, nextguid = struct.unpack_from(
        '<16sII', contents, guidind)
    words = struct.unpack('<8H', guidbytes)
    # midl seems to use the following simple hash function for GUIDs:
    guidhash = reduce(operator.xor, [w for w in words]) % (0x80 // 4)
    nextguid = hashtab[guidhash]
    struct.pack_into('<I', contents, guidind + 0x14, nextguid)
    hashtab[guidhash] = guidind - guid_off
  hash_off, hash_len = struct.unpack_from(
      '<II', contents, 0x54 + 4*ntypes + 4*16)
  for i, hashval in enumerate(hashtab):
    struct.pack_into('<I', contents, hash_off + 4*i, hashval)
  open(tlb_file, 'wb').write(contents)


def overwrite_cls_guid(h_file, iid_file, tlb_file, dynamic_guid):
  # Fix up GUID in .h, _i.c, and .tlb.  This currently assumes that there's
  # only one coclass in the idl file, and that that's the type with the
  # dynamic type.
  overwrite_cls_guid_h(h_file, dynamic_guid)
  overwrite_cls_guid_iid(iid_file, dynamic_guid)
  overwrite_cls_guid_tlb(tlb_file, dynamic_guid)


def main(arch, gendir, outdir, dynamic_guid, tlb, h, dlldata, iid, proxy, idl,
         *flags):
  # Copy checked-in outputs to final location.
  source = gendir
  if os.path.isdir(os.path.join(source, os.path.basename(idl))):
    source = os.path.join(source, os.path.basename(idl))
  source = os.path.join(source, arch.split('.')[1])  # Append 'x86' or 'x64'.
  source = os.path.normpath(source)
  distutils.dir_util.copy_tree(source, outdir, preserve_times=False)
  if dynamic_guid != 'none':
    overwrite_cls_guid(os.path.join(outdir, h),
                       os.path.join(outdir, iid),
                       os.path.join(outdir, tlb),
                       uuid.UUID(dynamic_guid))

  # On non-Windows, that's all we can do.
  if sys.platform != 'win32':
    return 0

  # On Windows, run midl.exe on the input and check that its outputs are
  # identical to the checked-in outputs (after possibly replacing their main
  # class guid).
  tmp_dir = tempfile.mkdtemp()
  delete_tmp_dir = True

  # Read the environment block from the file. This is stored in the format used
  # by CreateProcess. Drop last 2 NULs, one for list terminator, one for
  # trailing vs. separator.
  env_pairs = open(arch).read()[:-2].split('\0')
  env_dict = dict([item.split('=', 1) for item in env_pairs])

  args = ['midl', '/nologo'] + list(flags) + [
      '/out', tmp_dir,
      '/tlb', tlb,
      '/h', h,
      '/dlldata', dlldata,
      '/iid', iid,
      '/proxy', proxy,
      idl]
  try:
    popen = subprocess.Popen(args, shell=True, env=env_dict,
                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    out, _ = popen.communicate()
    # Filter junk out of stdout, and write filtered versions. Output we want
    # to filter is pairs of lines that look like this:
    # Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl
    # objidl.idl
    lines = out.decode('utf-8').splitlines()
    prefixes = ('Processing ', '64 bit Processing ')
    processing = set(os.path.basename(x)
                     for x in lines if x.startswith(prefixes))
    for line in lines:
      if not line.startswith(prefixes) and line not in processing:
        print(line)
    if popen.returncode != 0:
      return popen.returncode

    for f in os.listdir(tmp_dir):
      ZapTimestamp(os.path.join(tmp_dir, f))

    # Now compare the output in tmp_dir to the copied-over outputs.
    diff = filecmp.dircmp(tmp_dir, outdir)
    if diff.diff_files:
      print('midl.exe output different from files in %s, see %s' % (outdir,
                                                                    tmp_dir))
      for f in diff.diff_files:
        if f.endswith('.tlb'): continue
        fromfile = os.path.join(outdir, f)
        tofile = os.path.join(tmp_dir, f)
        print(''.join(
            difflib.unified_diff(
                io.open(fromfile).readlines(),
                io.open(tofile).readlines(), fromfile, tofile)))
      delete_tmp_dir = False
      print('To rebaseline:')
      print(r'  copy /y %s\* %s' % (tmp_dir, source))
      sys.exit(1)
    return 0
  finally:
    if os.path.exists(tmp_dir) and delete_tmp_dir:
      shutil.rmtree(tmp_dir)


if __name__ == '__main__':
  sys.exit(main(*sys.argv[1:]))