3.18 Vectorizable loop iteration counts

If a loop has a fixed iteration count, automatic vectorization is possible. The iteration count must occur at the start of the loop.

In the example vectorizable loop below, the iteration count is n. The value of n does not change throughout the course of the loop, so this loop can be automatically vectorized.

If a loop does not have a fixed iteration count, automatic vectorization is not possible.

In the example nonvectorizable loop below, the value of i changes throughout the course of the loop, so this loop cannot be automatically vectorized.

Table 3-4 Vectorizable and nonvectorizable loops

Vectorizable loop Nonvectorizable loop
/* myprog1.c */
int a[99], b[99], c[99], i, n;
...
for (i = 0; i < n; i++)
{
     a[i] = b[i] + c[i];
}
/* myprog2.c */
int a[99], b[99], c[99], i, n;
...
while (i < n)
{
     a[i] = b[i] + c[i];
     i += a[i];
};
armcc --cpu=Cortex-A8 -O3 -Otime --vectorize myprog1.c -o- armcc --cpu=Cortex-A8 -O3 -Otime --vectorize myprog2.c -o-
        ARM
        REQUIRE8
        PRESERVE8
        ARM
        REQUIRE8
        PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
AREA ||.text||, CODE, READONLY, ALIGN=2
f PROC
        PUSH     {r4-r6}
        MOV      r0,#0
        LDR      r4,|L1.160|
        STR      r0,[r4,#0]  ; i
        LDR      r12,[r4,#4]  ; n
        CMP      r12,#0
        BLE      |L1.152|
        ASR      r0,r12,#31
        LDR      r1,|L1.164|
        ADD      r0,r12,r0,LSR #30
        ADD      r2,r1,#0x18c
        ASRS     r0,r0,#2
        SUB      r3,r2,#0x318
        BEQ      |L1.80|
||foo|| PROC
        LDR      r3,|L1.76|
        LDR      r0,[r3,#0]  ; i, n
        LDR      r2,[r3,#4]
        CMP      r0,r2
        BXGE     lr
        PUSH     {r4-r6}
        LDR      r12,|L1.80|
        ADD      r4,r12,#0x18c
        SUB      r5,r12,#0x18c
|L1.56|
        VLD1.32  {d0,d1},[r1]!
        SUBS     r0,r0,#1
        VLD1.32  {d2,d3},[r2]!
        VADD.I32 q0,q0,q1
        VST1.32  {d0,d1},[r3]!
        BNE      |L1.56|
|L1.36|
        LDR      r1,[r12,r0,LSL #2]
        LDR      r6,[r4,r0,LSL #2]
        ADD      r1,r1,r6
        STR      r1,[r5,r0,LSL #2]
        ADD      r0,r0,r1
        CMP      r0,r2
        STR      r0,[r3,#0]  ; i
        BLT      |L1.36|
        POP      {r4-r6}
        BX       lr
        ENDP
|L1.80|
        AND      r0,r12,#3
        CMP      r0,#0
        BLE      |L1.144|
        SUB      r0,r12,r0
        CMP      r0,r12
        BGE      |L1.144|
        LDR      r1,|L1.164|
        ADD      r2,r1,#0x18c
        SUB      r3,r2,#0x318
 
|L1.116|
        LDR      r5,[r1,r0,LSL #2]
        LDR      r6,[r2,r0,LSL #2]
        ADD      r5,r5,r6
        STR      r5,[r3,r0,LSL #2]
        ADD      r0,r0,#1
        CMP      r0,r12
        BLT      |L1.116|
 
|L1.144|
        LDR      r0,[r4,#4]  ; n
        STR      r0,[r4,#0]  ; i
 
|L1.152|
        POP      {r4-r6}
        BX       lr
        ENDP
 
Non-ConfidentialPDF file icon PDF versionARM DUI0472M
Copyright © 2010-2016 ARM Limited or its affiliates. All rights reserved.