Few months ago I read an interesting post by Mike Haertel the original author of GNU grep titled “why GNU grep is fast“, one of the reason given is loop unrolling.
Few weeks ago I came a cross the same post which made me more interested on Loop Unrolling, so I decided to experiment with it, I implemented simple function to search for an item inside a list of items, and return as soon as it finds it, soo here is my result:
|
First |
Middle |
Not in Array |
| Normal Loop |
0.387 |
1.495 |
2.605 |
| Unrolled Loop |
0.397 |
1.07 |
1.701 |
| Improvement |
-2.52 |
39.72 |
53.15 |
|
|
|
|
| O2 Normal Loop |
0.045 |
0.279 |
0.373 |
| O2 Unrolled Loop |
0.045 |
0.217 |
0.285 |
| Improvement |
0.00 |
28.57 |
30.88 |
The result is taken using the “time” command, “O2″ means compiling the program with optimization level 2.
#include <stdio.h>
inline int item_match_fast(int item, int * items, int items_size)
{
int i;
int repeat, left;
repeat = items_size / 8;
left = items_size % 8;
i = 0;
while (repeat-- > 0) {
if (item==items[i]) { return 1; }
if (item==items[i+1]) { return 1; }
if (item==items[i+2]) { return 1; }
if (item==items[i+3]) { return 1; }
if (item==items[i+4]) { return 1; }
if (item==items[i+5]) { return 1; }
if (item==items[i+6]) { return 1; }
if (item==items[i+7]) { return 1; }
i += 8;
}
switch(left) {
case 7: if (item==items[i+7]) { return 1; }
case 6: if (item==items[i+6]) { return 1; }
case 5: if (item==items[i+5]) { return 1; }
case 4: if (item==items[i+4]) { return 1; }
case 3: if (item==items[i+3]) { return 1; }
case 2: if (item==items[i+2]) { return 1; }
case 1: if (item==items[i+1]) { return 1; }
case 0: ; /* don nothing */
}
return 0;
}
inline int item_match(int item, int * items, int items_size)
{
int i;
/* check items array */
for (i=0; i<items_size ; i++) {
if(item==items[i]) {
return 1;
}
}
return 0;
}
#define REPEAT 10000000
#define ITEM 7070
int main(int argc, char ** argv)
{
int i;
int items[] = { 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010,
1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020 };
int items_size = 20;
if (argc==2) {
printf("Loop unrolled\n");
for (i=0;i<REPEAT; i++) {
item_match_fast(ITEM, items, items_size);
}
} else {
printf("Normal Loop\n");
for (i=0;i<REPEAT; i++) {
item_match(ITEM, items, items_size);
}
}
return 0;
}