E.3.25. 针对标量值的运算

仅当标量自变量为常数或用于 vget_lane 内在函数之一时,才能保证以下内在函数生成有效的代码。

向量与标量进行的乘加




int16x4_t   vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VMLA.I16 d0, d0, d0[0]

int32x2_t   vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VMLA.I32 d0, d0, d0[0]

uint16x4_t  vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l);

                                                // VMLA.I16 d0, d0, d0[0]

uint32x2_t  vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l);

                                                // VMLA.I32 d0, d0, d0[0]

float32x2_t vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l);

                                                // VMLA.F32 d0, d0, d0[0]

int16x8_t   vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VMLA.I16 q0, q0, d0[0]

int32x4_t   vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VMLA.I32 q0, q0, d0[0]

uint16x8_t  vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l);

                                                // VMLA.I16 q0, q0, d0[0]

uint32x4_t  vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l);

                                                // VMLA.I32 q0, q0, d0[0]

float32x4_t vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l);

                                                // VMLA.F32 q0, q0, d0[0]

向量与标量进行的扩大乘加


int32x4_t   vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VMLAL.S16 q0, d0, d0[0]

int64x2_t   vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VMLAL.S32 q0, d0, d0[0]

uint32x4_t  vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l);

                                                // VMLAL.U16 q0, d0, d0[0]

uint64x2_t  vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l);

                                                // VMLAL.U32 q0, d0, d0[0]

向量与标量进行的扩大饱和加倍乘加


int32x4_t   vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VQDMLAL.S16 q0, d0, d0[0]

int64x2_t   vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VQDMLAL.S32 q0, d0, d0[0]

向量与标量进行的乘减


int16x4_t   vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VMLS.I16 d0, d0, d0[0]

int32x2_t   vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VMLS.I32 d0, d0, d0[0]

uint16x4_t  vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l);

                                                // VMLS.I16 d0, d0, d0[0]

uint32x2_t  vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l);

                                                // VMLS.I32 d0, d0, d0[0]

float32x2_t vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v, __constrange(0,1) int l);

                                                // VMLS.F32 d0, d0, d0[0]

int16x8_t   vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VMLS.I16 q0, q0, d0[0]

int32x4_t   vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VMLS.I32 q0, q0, d0[0]

uint16x8_t  vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v, __constrange(0,3) int l);

                                                // VMLS.I16 q0, q0, d0[0]

uint32x4_t  vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v, __constrange(0,1) int l);

                                                // VMLS.I32 q0, q0, d0[0]

float32x4_t vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v, __constrange(0,1) int l);

                                                // VMLS.F32 q0, q0, d0[0]

向量与标量进行的扩大乘减


int32x4_t   vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VMLSL.S16 q0, d0, d0[0]

int64x2_t   vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VMLSL.S32 q0, d0, d0[0]

uint32x4_t  vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t v, __constrange(0,3) int l);

                                                // VMLSL.U16 q0, d0, d0[0]

uint64x2_t  vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t v, __constrange(0,1) int l);

                                                // VMLSL.U32 q0, d0, d0[0]

向量与标量进行的扩大饱和加倍乘减


int32x4_t   vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v, __constrange(0,3) int l);

                                                // VQDMLSL.S16 q0, d0, d0[0]

int64x2_t   vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v, __constrange(0,1) int l);

                                                // VQDMLSL.S32 q0, d0, d0[0]

向量乘以标量


int16x4_t   vmul_n_s16(int16x4_t a, int16_t b);      // VMUL.I16 d0,d0,d0[0]

int32x2_t   vmul_n_s32(int32x2_t a, int32_t b);      // VMUL.I32 d0,d0,d0[0]

float32x2_t vmul_n_f32(float32x2_t a, float32_t b);  // VMUL.F32 d0,d0,d0[0]

uint16x4_t  vmul_n_u16(uint16x4_t a, uint16_t b);    // VMUL.I16 d0,d0,d0[0]

uint32x2_t  vmul_n_u32(uint32x2_t a, uint32_t b);    // VMUL.I32 d0,d0,d0[0]

int16x8_t   vmulq_n_s16(int16x8_t a, int16_t b);     // VMUL.I16 q0,q0,d0[0]

int32x4_t   vmulq_n_s32(int32x4_t a, int32_t b);     // VMUL.I32 q0,q0,d0[0]

float32x4_t vmulq_n_f32(float32x4_t a, float32_t b); // VMUL.F32 q0,q0,d0[0]

uint16x8_t  vmulq_n_u16(uint16x8_t a, uint16_t b);   // VMUL.I16 q0,q0,d0[0]

uint32x4_t  vmulq_n_u32(uint32x4_t a, uint32_t b);   // VMUL.I32 q0,q0,d0[0]

向量与标量进行的长型乘法


int32x4_t vmull_n_s16(int16x4_t vec1, int16_t val2);    // VMULL.S16 q0,d0,d0[0]


int64x2_t vmull_n_s32(int32x2_t vec1, int32_t val2);    // VMULL.S32 q0,d0,d0[0]


uint32x4_t vmull_n_u16(uint16x4_t vec1, uint16_t val2); // VMULL.U16 q0,d0,d0[0]


uint64x2_t vmull_n_u32(uint32x2_t vec1, uint32_t val2); // VMULL.U32 q0,d0,d0[0]

向量与标量进行的长型乘法


int32x4_t vmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3);

                                                          // VMULL.S16 q0,d0,d0[0]int64x2_t vmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3);

                                                          // VMULL.S32 q0,d0,d0[0]uint32x4_t vmull_lane_u16(uint16x4_t vec1, uint16x4_t val2, __constrange(0, 3) int val3);

                                                          // VMULL.U16 q0,d0,d0[0]uint64x2_t vmull_lane_u32(uint32x2_t vec1, uint32x2_t val2, __constrange(0, 1) int val3);

                                                          // VMULL.U32 q0,d0,d0[0]

向量与标量进行的饱和加倍长型乘法


int32x4_t vqdmull_n_s16(int16x4_t vec1, int16_t val2);    // VQDMULL.S16 q0,d0,d0[0]


int64x2_t vqdmull_n_s32(int32x2_t vec1, int32_t val2);    // VQDMULL.S32 q0,d0,d0[0]

向量与标量进行的饱和加倍长型乘法


int32x4_t vqdmull_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3);

                                                // VQDMULL.S16 q0,d0,d0[0]


int64x2_t vqdmull_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3);

                                                // VQDMULL.S32 q0,d0,d0[0]

向量与标量进行的高位饱和加倍乘法


int16x4_t vqdmulh_n_s16(int16x4_t vec1, int16_t val2);     // VQDMULH.S16 d0,d0,d0[0]


int32x2_t vqdmulh_n_s32(int32x2_t vec1, int32_t val2);     // VQDMULH.S32 d0,d0,d0[0]


int16x8_t vqdmulhq_n_s16(int16x8_t vec1, int16_t val2);    // VQDMULH.S16 q0,q0,d0[0]


int32x4_t vqdmulhq_n_s32(int32x4_t vec1, int32_t val2);    // VQDMULH.S32 q0,q0,d0[0]

向量与标量进行的高位饱和加倍乘法


int16x4_t vqdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3);

                                                // VQDMULH.S16 d0,d0,d0[0]


int32x2_t vqdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3);

                                                // VQDMULH.S32 d0,d0,d0[0]


int16x8_t vqdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3);

                                                // VQDMULH.S16 q0,q0,d0[0]


int32x4_t vqdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3);

                                               // VQDMULH.S32 q0,q0,d0[0]

向量与标量进行的高位饱和舍入加倍乘法


int16x4_t vqrdmulh_n_s16(int16x4_t vec1, int16_t val2);     // VQRDMULH.S16 d0,d0,d0[0]


int32x2_t vqrdmulh_n_s32(int32x2_t vec1, int32_t val2);     // VQRDMULH.S32 d0,d0,d0[0]


int16x8_t vqrdmulhq_n_s16(int16x8_t vec1, int16_t val2);    // VQRDMULH.S16 q0,q0,d0[0]


int32x4_t vqrdmulhq_n_s32(int32x4_t vec1, int32_t val2);    // VQRDMULH.S32 q0,q0,d0[0]

向量与标量进行的高位舍入饱和加倍乘法


int16x4_t vqrdmulh_lane_s16(int16x4_t vec1, int16x4_t val2, __constrange(0, 3) int val3);

                                               // VQRDMULH.S16 d0,d0,d0[0]


int32x2_t vqrdmulh_lane_s32(int32x2_t vec1, int32x2_t val2, __constrange(0, 1) int val3);

                                               // VQRDMULH.S32 d0,d0,d0[0]


int16x8_t vqrdmulhq_lane_s16(int16x8_t vec1, int16x4_t val2, __constrange(0, 3) int val3);

                                               // VQRDMULH.S16 q0,q0,d0[0]


int32x4_t vqrdmulhq_lane_s32(int32x4_t vec1, int32x2_t val2, __constrange(0, 1) int val3);

                                               // VQRDMULH.S32 q0,q0,d0[0]

向量与标量进行的乘加


int16x4_t   vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c);           // VMLA.I16 d0, d0, d0[0]  

int32x2_t   vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c);           // VMLA.I32 d0, d0, d0[0]  

uint16x4_t  vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c);        // VMLA.I16 d0, d0, d0[0]  

uint32x2_t  vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c);        // VMLA.I32 d0, d0, d0[0]  

float32x2_t vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c);     // VMLA.F32 d0, d0, d0[0]  

int16x8_t   vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c);          // VMLA.I16 q0, q0, d0[0]  

int32x4_t   vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c);          // VMLA.I32 q0, q0, d0[0]  

uint16x8_t  vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c);       // VMLA.I16 q0, q0, d0[0]  

uint32x4_t  vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c);       // VMLA.I32 q0, q0, d0[0]  

float32x4_t vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c);    // VMLA.F32 q0, q0, d0[0]  

向量与标量进行的扩大乘加


int32x4_t   vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c);       // VMLAL.S16 q0, d0, d0[0] 

int64x2_t   vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c);       // VMLAL.S32 q0, d0, d0[0] 

uint32x4_t  vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c);    // VMLAL.U16 q0, d0, d0[0] 

uint64x2_t  vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c);    // VMLAL.U32 q0, d0, d0[0] 

向量与标量进行的扩大饱和加倍乘加


int32x4_t   vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c);     // VQDMLAL.S16 q0, d0, d0[0]  

int64x2_t   vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c);     // VQDMLAL.S32 q0, d0, d0[0]  

向量与标量进行的乘减


int16x4_t   vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c);        // VMLS.I16 d0, d0, d0[0]  

int32x2_t   vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c);        // VMLS.I32 d0, d0, d0[0]  

uint16x4_t  vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c);     // VMLS.I16 d0, d0, d0[0]  

uint32x2_t  vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c);     // VMLS.I32 d0, d0, d0[0]  

float32x2_t vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c);  // VMLS.F32 d0, d0, d0[0]  

int16x8_t   vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c);       // VMLS.I16 q0, q0, d0[0]  

int32x4_t   vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c);       // VMLS.I32 q0, q0, d0[0]  

uint16x8_t  vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c);    // VMLS.I16 q0, q0, d0[0]  

uint32x4_t  vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c);    // VMLS.I32 q0, q0, d0[0]  

float32x4_t vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c); // VMLS.F32 q0, q0, d0[0]  

向量与标量进行的扩大乘减


int32x4_t   vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c);       // VMLSL.S16 q0, d0, d0[0] 

int64x2_t   vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c);       // VMLSL.S32 q0, d0, d0[0] 

uint32x4_t  vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c);    // VMLSL.U16 q0, d0, d0[0] 

uint64x2_t  vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c);    // VMLSL.U32 q0, d0, d0[0] 

向量与标量进行的扩大饱和加倍乘减


int32x4_t   vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c);   // VQDMLSL.S16 q0, d0, d0[0]  

int64x2_t   vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c);   // VQDMLSL.S32 q0, d0, d0[0]  

Copyright © 2007 ARM Limited. All rights reserved. ARM DUI 0348AC
Non-Confidential