|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
GCC at least is generating horrible code for them, so do the bit twiddling
ourselves. This is not premature optimization, because it might be the seed
for a QBitField class, as suggested by Andrei Alexandrescu on GN2013.
The assembler code of default and copy ctors is unchanged between the old
and new versions. Accessors such as redSize() are virtually identical (with
an and and a mov exchanging place). The interesting thing is what happend in
operator== and the set-all-fields ctor. These are the disassemblies of
functions wrapping op== and the ctor, resp.:
bool pf_comparison(QPixelFormat, QPixelFormat):
.cfi_startproc | .cfi_startproc
movl %esi, %edx | cmpq %rsi, %rdi
xorl %eax, %eax | sete %al
xorl %edi, %edx | ret
andl $15, %edx | .cfi_endproc
jne .L53 |
movl %esi, %edx |
xorl %edi, %edx |
testw $1008, %dx |
jne .L53 |
movl %esi, %edx |
pushq %rbx |
.cfi_def_cfa_offset 16 |
.cfi_offset 3, -16 |
movl %edi, %ebx |
movzbl %dh, %ecx |
movzbl %bh, %edx |
xorl %ecx, %edx |
andl $252, %edx |
jne .L40 |
movq %rdi, %rcx |
movq %rsi, %rdx |
shrq $16, %rcx |
shrq $16, %rdx |
movl %ecx, %ebx |
xorl %edx, %ebx |
movl %ebx, %r8d |
andl $63, %r8d |
jne .L40 |
xorl %ecx, %edx |
testw $4032, %dx |
jne .L40 |
movq %rsi, %rcx |
movabsq $16911433728, %rdx |
xorq %rdi, %rcx |
testq %rdx, %rcx |
jne .L40 |
movq %rdi, %rdx |
movq %rsi, %rcx |
shrq $32, %rdx |
shrq $32, %rcx |
movl %edx, %ebx |
xorl %ecx, %ebx |
movl %ebx, %r8d |
andl $252, %r8d |
jne .L40 |
movq %rsi, %r9 |
movq %rdi, %r8 |
shrq $40, %r9 |
shrq $40, %r8 |
xorl %r9d, %r8d |
andl $127, %r8d |
jne .L40 |
xorl %ecx, %edx |
andl $98304, %edx |
jne .L40 |
movq %rsi, %rcx |
movq %rdi, %rdx |
shrq $48, %rcx |
shrq $48, %rdx |
xorl %ecx, %edx |
andl $126, %edx |
jne .L40 |
shrq $48, %rdi |
movq %rcx, %rax |
xorl %edi, %eax |
testw $-128, %ax |
sete %al |
.p2align 4,,10 |
.p2align 3 |
.L40: |
popq %rbx |
.cfi_restore 3 |
.cfi_def_cfa_offset 8 |
.L53: |
rep |
ret |
.cfi_endproc |
That one is pretty obvious. Hint: the right one is the new version.
QPixelFormat pf_unwieldy_ctor(QPixelFormat::ColorModel, ...)
.cfi_startproc | .cfi_startproc
movq %rbp, -32(%rsp) | movq %rbx, -40(%rsp)
movq %r12, -24(%rsp) | .cfi_offset 3, -48
andl $15, %edi | movzbl 8(%rsp), %ebx
movq %r13, -16(%rsp) | andl $63, %esi
movq %r14, -8(%rsp) | movq %rbp, -32(%rsp)
andl $63, %esi | movq %r12, -24(%rsp)
movl 48(%rsp), %r11d | andl $63, %edx
movzbl 8(%rsp), %r10d | .cfi_offset 6, -40
andl $63, %edx | .cfi_offset 12, -32
.cfi_offset 6, -40 | movl 16(%rsp), %ebp
.cfi_offset 12, -32 | movl 32(%rsp), %r11d
.cfi_offset 13, -24 | andl $63, %ecx
.cfi_offset 14, -16 | movl 40(%rsp), %r10d
movzbl 16(%rsp), %r14d | movq %r13, -16(%rsp)
movzbl 24(%rsp), %r13d | andl $63, %r8d
andl $63, %ecx | andl $63, %ebx
movzbl 32(%rsp), %r12d | .cfi_offset 13, -24
movzbl 40(%rsp), %ebp | movl 48(%rsp), %r13d
andl $63, %r8d | andl $63, %r9d
movl %r11d, %eax | movq %rbx, %r12
movq %rbx, -40(%rsp) | movl 24(%rsp), %ebx
.cfi_offset 3, -48 | andl $1, %ebp
andl $63, %r9d | andl $1, %r11d
andl $3, %eax | andl $15, %r10d
andl $63, %r10d | movq %rdi, %rax
xorl %ebx, %ebx | andl $15, %eax
andl $1, %r14d | salq $4, %rsi
andl $1, %r13d | salq $10, %rdx
andl $1, %r12d | andl $1, %ebx
andl $15, %ebp | salq $16, %rcx
cmpl $2, %r11d | salq $22, %r8
cmovne %eax, %ebx | salq $28, %r9
andl $63, %esi | salq $34, %r12
movq %rdi, %rax | salq $40, %rbp
salq $4, %rsi | salq $41, %rbx
andl $15, %eax | salq $42, %r11
andl $63, %edx | salq $43, %r10
salq $10, %rdx | cmpl $2, %r13d
orq %rsi, %rax | movq %r14, -8(%rsp)
andl $63, %ecx | .cfi_offset 14, -16
salq $16, %rcx | movzbl 56(%rsp), %r14d
orq %rdx, %rax | je .L45
andl $63, %r8d | andl $3, %r13d
orq %rcx, %rax | salq $47, %r13
salq $22, %r8 |.L44:
andl $63, %r9d | orq %rsi, %rax
salq $28, %r9 | orq %rdx, %rax
orq %r8, %rax | movq %r14, %rdx
andl $63, %r10d | movq -8(%rsp), %r14
movq %r14, %rcx | orq %rcx, %rax
salq $34, %r10 | andl $63, %edx
orq %r9, %rax | orq %r8, %rax
andl $1, %ecx | salq $49, %rdx
movq %r13, %rdx | orq %r9, %rax
orq %r10, %rax | orq %r12, %rax
salq $40, %rcx | movq -24(%rsp), %r12
andl $1, %edx | orq %rbp, %rax
movq -16(%rsp), %r13 | movq -32(%rsp), %rbp
salq $41, %rdx | orq %rbx, %rax
orq %rcx, %rax | movq -40(%rsp), %rbx
movq %r12, %rcx | orq %r11, %rax
orq %rdx, %rax | orq %r10, %rax
andl $1, %ecx | orq %rdx, %rax
movq %rbp, %rdx | orq %r13, %rax
salq $42, %rcx | movq -16(%rsp), %r13
andl $15, %edx | ret
movq -32(%rsp), %rbp | .p2align 4,,10
salq $43, %rdx | .p2align 3
orq %rcx, %rax |.L45:
movq %rbx, %rcx | xorl %r13d, %r13d
orq %rdx, %rax | jmp .L44
movzbl 56(%rsp), %edx | .cfi_endproc
andl $3, %ecx |
salq $47, %rcx |
movq -40(%rsp), %rbx |
movq -24(%rsp), %r12 |
movq -8(%rsp), %r14 |
andl $63, %edx |
orq %rcx, %rax |
salq $49, %rdx |
orq %rdx, %rax |
ret |
.cfi_endproc |
Without bothering to understand the details, they look pretty similar, with the new
version being slightly shorter. But that may not mean anything.
Change-Id: I31e84c9109ccd0c7282351b2e2802407a9b360b4
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
|