E.3.2. 곱하기

다음 내장 함수는 곱하기를 포함하는 연산을 제공합니다.

벡터 곱하기: vmul -> Vr[i] := Va[i] * Vb[i]

int8x8_t  vmul_s8(int8x8_t a, int8x8_t b);     // VMUL.I8 d0,d0,d0 
int16x4_t  vmul_s16(int16x4_t a, int16x4_t b);   // VMUL.I16 d0,d0,d0
int32x2_t  vmul_s32(int32x2_t a, int32x2_t b);   // VMUL.I32 d0,d0,d0
float32x2_t vmul_f32(float32x2_t a, float32x2_t b); // VMUL.F32 d0,d0,d0
uint8x8_t  vmul_u8(uint8x8_t a, uint8x8_t b);    // VMUL.I8 d0,d0,d0 
uint16x4_t vmul_u16(uint16x4_t a, uint16x4_t b);  // VMUL.I16 d0,d0,d0
uint32x2_t vmul_u32(uint32x2_t a, uint32x2_t b);  // VMUL.I32 d0,d0,d0
poly8x8_t  vmul_p8(poly8x8_t a, poly8x8_t b);    // VMUL.P8 d0,d0,d0 
int8x16_t  vmulq_s8(int8x16_t a, int8x16_t b);   // VMUL.I8 q0,q0,q0 
int16x8_t  vmulq_s16(int16x8_t a, int16x8_t b);   // VMUL.I16 q0,q0,q0
int32x4_t  vmulq_s32(int32x4_t a, int32x4_t b);   // VMUL.I32 q0,q0,q0
float32x4_t vmulq_f32(float32x4_t a, float32x4_t b); // VMUL.F32 q0,q0,q0
uint8x16_t vmulq_u8(uint8x16_t a, uint8x16_t b);  // VMUL.I8 q0,q0,q0 
uint16x8_t vmulq_u16(uint16x8_t a, uint16x8_t b);  // VMUL.I16 q0,q0,q0
uint32x4_t vmulq_u32(uint32x4_t a, uint32x4_t b);  // VMUL.I32 q0,q0,q0
poly8x16_t vmulq_p8(poly8x16_t a, poly8x16_t b);  // VMUL.P8 q0,q0,q0 

벡터 곱하기 누산: vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i]

int8x8_t  vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c);      // VMLA.I8 d0,d0,d0 
int16x4_t  vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c);    // VMLA.I16 d0,d0,d0
int32x2_t  vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c);    // VMLA.I32 d0,d0,d0
float32x2_t vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c); // VMLA.F32 d0,d0,d0
uint8x8_t  vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c);     // VMLA.I8 d0,d0,d0 
uint16x4_t vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c);   // VMLA.I16 d0,d0,d0
uint32x2_t vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c);   // VMLA.I32 d0,d0,d0
int8x16_t  vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c);    // VMLA.I8 q0,q0,q0 
int16x8_t  vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c);    // VMLA.I16 q0,q0,q0
int32x4_t  vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c);    // VMLA.I32 q0,q0,q0
float32x4_t vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLA.F32 q0,q0,q0
uint8x16_t vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c);   // VMLA.I8 q0,q0,q0 
uint16x8_t vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c);  // VMLA.I16 q0,q0,q0
uint32x4_t vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c);  // VMLA.I32 q0,q0,q0

벡터 곱하기 누산 long: vmla -> Vr[i] := Va[i] + Vb[i] * Vc[i]

int16x8_t vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c);    // VMLAL.S8 q0,d0,d0 
int32x4_t vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c);  // VMLAL.S16 q0,d0,d0
int64x2_t vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c);  // VMLAL.S32 q0,d0,d0
uint16x8_t vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c);  // VMLAL.U8 q0,d0,d0 
uint32x4_t vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VMLAL.U16 q0,d0,d0
uint64x2_t vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VMLAL.U32 q0,d0,d0

벡터 곱하기 빼기: vmls -> Vr[i] := Va[i] - Vb[i] * Vc[i]

int8x8_t  vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c);      // VMLS.I8 d0,d0,d0 
int16x4_t  vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c);    // VMLS.I16 d0,d0,d0
int32x2_t  vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c);    // VMLS.I32 d0,d0,d0
float32x2_t vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c); // VMLS.F32 d0,d0,d0
uint8x8_t  vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c);     // VMLS.I8 d0,d0,d0 
uint16x4_t vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c);   // VMLS.I16 d0,d0,d0
uint32x2_t vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c);   // VMLS.I32 d0,d0,d0
int8x16_t  vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c);    // VMLS.I8 q0,q0,q0 
int16x8_t  vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c);    // VMLS.I16 q0,q0,q0
int32x4_t  vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c);    // VMLS.I32 q0,q0,q0
float32x4_t vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c); // VMLS.F32 q0,q0,q0
uint8x16_t vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c);   // VMLS.I8 q0,q0,q0 
uint16x8_t vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c);  // VMLS.I16 q0,q0,q0
uint32x4_t vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c);  // VMLS.I32 q0,q0,q0

벡터 곱하기 빼기 long

int16x8_t vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c);    // VMLSL.S8 q0,d0,d0 
int32x4_t vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c);  // VMLSL.S16 q0,d0,d0
int64x2_t vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c);  // VMLSL.S32 q0,d0,d0
uint16x8_t vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c);  // VMLSL.U8 q0,d0,d0 
uint32x4_t vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c); // VMLSL.U16 q0,d0,d0
uint64x2_t vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c); // VMLSL.U32 q0,d0,d0

벡터 상위 포화 배수화 곱하기

int16x4_t vqdmulh_s16(int16x4_t a, int16x4_t b); // VQDMULH.S16 d0,d0,d0
int32x2_t vqdmulh_s32(int32x2_t a, int32x2_t b); // VQDMULH.S32 d0,d0,d0
int16x8_t vqdmulhq_s16(int16x8_t a, int16x8_t b); // VQDMULH.S16 q0,q0,q0
int32x4_t vqdmulhq_s32(int32x4_t a, int32x4_t b); // VQDMULH.S32 q0,q0,q0

벡터 상위 포화 반올림 배수화 곱하기

int16x4_t vqrdmulh_s16(int16x4_t a, int16x4_t b); // VQRDMULH.S16 d0,d0,d0
int32x2_t vqrdmulh_s32(int32x2_t a, int32x2_t b); // VQRDMULH.S32 d0,d0,d0
int16x8_t vqrdmulhq_s16(int16x8_t a, int16x8_t b); // VQRDMULH.S16 q0,q0,q0
int32x4_t vqrdmulhq_s32(int32x4_t a, int32x4_t b); // VQRDMULH.S32 q0,q0,q0

벡터 포화 배수화 곱하기 누산 long

int32x4_t vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VQDMLAL.S16 q0,d0,d0
int64x2_t vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VQDMLAL.S32 q0,d0,d0

벡터 포화 배수화 곱하기 빼기 long

int32x4_t vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c); // VQDMLSL.S16 q0,d0,d0
int64x2_t vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c); // VQDMLSL.S32 q0,d0,d0

벡터 long 곱하기

int16x8_t vmull_s8(int8x8_t a, int8x8_t b);   // VMULL.S8 q0,d0,d0 
int32x4_t vmull_s16(int16x4_t a, int16x4_t b);  // VMULL.S16 q0,d0,d0
int64x2_t vmull_s32(int32x2_t a, int32x2_t b);  // VMULL.S32 q0,d0,d0
uint16x8_t vmull_u8(uint8x8_t a, uint8x8_t b);  // VMULL.U8 q0,d0,d0 
uint32x4_t vmull_u16(uint16x4_t a, uint16x4_t b); // VMULL.U16 q0,d0,d0
uint64x2_t vmull_u32(uint32x2_t a, uint32x2_t b); // VMULL.U32 q0,d0,d0
poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b);  // VMULL.P8 q0,d0,d0 

벡터 포화 배수화 long 곱하기

int32x4_t vqdmull_s16(int16x4_t a, int16x4_t b); // VQDMULL.S16 q0,d0,d0
int64x2_t vqdmull_s32(int32x2_t a, int32x2_t b); // VQDMULL.S32 q0,d0,d0
Copyright © 2007 ARM Limited. All rights reserved.ARM DUI 0348AK
Non-Confidential