Schmide
Diamond Member
- Mar 7, 2002
- 5,690
- 926
- 126
I had some free time so I decided to play with icc 13.
First a handy site: http://gcc.godbolt.org/. This allows you to enter code and see the resulting assembly after compilation with gcc, icc, clang and gcc-arm.
Nice tool.
Loop halved/instructions unrolled, breaks the optimization.
Code:
void r(unsigned *bitmap)
{
unsigned baddr = 0;
unsigned nb = 32;
unsigned overun = nb & 1;
nb>>=1;
while (nb--) {
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
baddr++;
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
baddr++;
}
if(overun)
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}
-O3 -m32
Code:
L__routine_start__Z1rPj_0:
r(unsigned int*):
pushl %esi #4.1
pushl %edi #4.1
pushl %ebx #4.1
xorl %eax, %eax #
movl 16(%esp), %ecx #3.6
movl $15, %edx #
movl %ecx, %esi #
..B1.2: # Preds ..B1.2 ..B1.1
movl %eax, %ebx #11.21
movl %eax, %ecx #11.41
shrl $5, %ebx #11.21
movl $1, %edi #11.41
shll %cl, %edi #11.41
lea 1(%eax), %ecx #14.5
decl %edx #10.10
addl $2, %eax #14.5
orl %edi, (%esi,%ebx,4) #11.5
movl %ecx, %ebx #13.21
shrl $5, %ebx #13.21
movl $1, %edi #13.41
shll %cl, %edi #13.41
orl %edi, (%esi,%ebx,4) #13.5
cmpl $-1, %edx #10.10
jne ..B1.2 # Prob 82% #10.10
popl %ebx #18.1
popl %edi #18.1
popl %esi #18.1
ret #18.1
Edit: This one is funny.
Index halved, two subsequent loops identical except for the counter and an overun if necessary.
Code:
// Type your code here, or load an example.
void r(unsigned *bitmap)
{
unsigned baddr = 0;
unsigned nb = 32;
unsigned overun = nb & 1;
nb>>=1;
unsigned nb2=nb;
while (nb--) {
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
baddr++;
}
while (nb2--) {
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
baddr++;
}
if(overun)
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}
-O3 -m32
Code:
L__routine_start__Z1rPj_0:
r(unsigned int*):
pushl %esi #4.1
pushl %edi #4.1
pushl %esi #4.1
xorl %edx, %edx #
movl $15, %esi #
movl %edx, %ecx #
movl 16(%esp), %eax #3.6
movl %esi, %edi #
movl %ebp, (%esp) #
..B1.2: # Preds ..B1.2 ..B1.1
movl %ecx, %edx #11.21
movl $1, %ebp #11.41
shrl $5, %edx #11.21
decl %esi #10.10
shll %cl, %ebp #11.41
incl %ecx #12.5
orl %ebp, (%eax,%edx,4) #11.5
cmpl $-1, %esi #10.10
jne ..B1.2 # Prob 82% #10.10
movl %ecx, %edx #
lea 1(%edi), %ecx #14.10
movl (%esp), %ebp #
movl %edx, %esi #14.10
andl $31, %edx #14.10
shrl $5, %esi #14.10
lea (%eax,%esi,4), %eax #14.10
movl %ecx, %esi #14.10
addl %edx, %ecx #14.10
cmpl $32, %ecx #14.10
jbe ..L12 # Prob 50% #14.10
movl %ecx, %esi #14.10
movl %edx, %ecx #14.10
movl $-1, %edi #14.10
shll %cl, %edi #14.10
orl %edi, (%eax) #14.10
subl $32, %esi #14.10
addl $4, %eax #14.10
movl $-1, %edi #14.10
cmpl $32, %esi #14.10
jbe ..L13 # Prob 50% #14.10
..L14: #
movl %edi, (%eax) #14.10
addl $4, %eax #14.10
subl $32, %esi #14.10
cmpl $32, %esi #14.10
ja ..L14 # Prob 50% #14.10
..L13: #
movl $32, %ecx #14.10
subl %esi, %ecx #14.10
shrl %cl, %edi #14.10
orl %edi, (%eax) #14.10
jmp ..L15 # Prob 100% #14.10
..L12: #
movl $-1, %edi #14.10
movl $32, %ecx #14.10
subl %esi, %ecx #14.10
shrl %cl, %edi #14.10
movl %edx, %ecx #14.10
shll %cl, %edi #14.10
orl %edi, (%eax) #14.10
..L15: #
popl %ecx #20.1
popl %edi #20.1
popl %esi #20.1
ret #20.1
Skips the first loop optimization, optimizes the second.
Change nb to an odd number, breaks the optimization for the second loop!
Code:
void r(unsigned *bitmap)
{
unsigned baddr = 0;
unsigned nb = 65;
unsigned overun = nb & 1;
nb>>=1;
unsigned nb2=nb;
while (nb--) {
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
baddr++;
}
while (nb2--) {
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
baddr++;
}
if(overun)
bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}
-O3 -m32
Code:
pushl %esi #4.1
pushl %edi #4.1
pushl %esi #4.1
xorl %ecx, %ecx #
movl 16(%esp), %edx #
movl $31, %eax #
..B1.2: # Preds ..B1.2 ..B1.1
movl %ecx, %esi #11.21
movl $1, %edi #11.41
shrl $5, %esi #11.21
decl %eax #10.10
shll %cl, %edi #11.41
incl %ecx #12.5
orl %edi, (%edx,%esi,4) #11.5
cmpl $-1, %eax #10.10
jne ..B1.2 # Prob 82% #10.10
movl 16(%esp), %edi #14.10
movl $31, %eax #14.10
..B1.4: # Preds ..B1.4 ..B1.3
movl %ecx, %edx #15.21
movl $1, %esi #15.41
shrl $5, %edx #15.21
decl %eax #14.10
shll %cl, %esi #15.41
incl %ecx #16.5
orl %esi, (%edi,%edx,4) #15.5
cmpl $-1, %eax #14.10
jne ..B1.4 # Prob 82% #14.10
movl %ecx, %eax #19.21
movl $1, %edx #19.41
shrl $5, %eax #19.21
shll %cl, %edx #19.41
movl 16(%esp), %ecx #19.5
orl %edx, (%ecx,%eax,4) #19.5
popl %ecx #20.1
popl %edi #20.1
popl %esi #20.1
ret #20.1
Optimization skipped because of the overun.
Last edited: