18.17 NEON intrinsics for loading a single vector or lane

Perform loads and stores of a single vector of some type.

Load a single vector from memory

uint8x16_t  vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
uint16x8_t  vld1q_u16(__transfersize(8) uint16_t const * ptr);
                                                              // VLD1.16 {d0, d1}, [r0]
uint32x4_t  vld1q_u32(__transfersize(4) uint32_t const * ptr); 
                                                              // VLD1.32 {d0, d1}, [r0]
uint64x2_t  vld1q_u64(__transfersize(2) uint64_t const * ptr);// VLD1.64 {d0, d1}, [r0]
int8x16_t   vld1q_s8(__transfersize(16) int8_t const * ptr); 
                                                              // VLD1.8 {d0, d1}, [r0]
int16x8_t   vld1q_s16(__transfersize(8) int16_t const * ptr); // VLD1.16 {d0, d1}, [r0]
int32x4_t   vld1q_s32(__transfersize(4) int32_t const * ptr); // VLD1.32 {d0, d1}, [r0]
int64x2_t   vld1q_s64(__transfersize(2) int64_t const * ptr); // VLD1.64 {d0, d1}, [r0]
float16x8_t vld1q_f16(__transfersize(8) __fp16 const * ptr);  // VLD1.16 {d0, d1}, [r0]
float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); 
                                                              // VLD1.32 {d0, d1}, [r0]
poly8x16_t  vld1q_p8(__transfersize(16) poly8_t const * ptr); // VLD1.8 {d0, d1}, [r0]
poly16x8_t  vld1q_p16(__transfersize(8) poly16_t const * ptr); 
                                                              // VLD1.16 {d0, d1}, [r0]
uint8x8_t   vld1_u8(__transfersize(8) uint8_t const * ptr);   // VLD1.8 {d0}, [r0]
uint16x4_t  vld1_u16(__transfersize(4) uint16_t const * ptr); // VLD1.16 {d0}, [r0]
uint32x2_t  vld1_u32(__transfersize(2) uint32_t const * ptr); // VLD1.32 {d0}, [r0]
uint64x1_t  vld1_u64(__transfersize(1) uint64_t const * ptr); // VLD1.64 {d0}, [r0]
int8x8_t    vld1_s8(__transfersize(8) int8_t const * ptr);    // VLD1.8 {d0}, [r0]
int16x4_t   vld1_s16(__transfersize(4) int16_t const * ptr);  // VLD1.16 {d0}, [r0]
int32x2_t   vld1_s32(__transfersize(2) int32_t const * ptr);  // VLD1.32 {d0}, [r0]
int64x1_t   vld1_s64(__transfersize(1) int64_t const * ptr);  // VLD1.64 {d0}, [r0]
float16x4_t vld1_f16(__transfersize(4) __fp16 const * ptr);   // VLD1.16 {d0}, [r0]
float32x2_t vld1_f32(__transfersize(2) float32_t const * ptr);// VLD1.32 {d0}, [r0]
poly8x8_t   vld1_p8(__transfersize(8) poly8_t const * ptr);   // VLD1.8 {d0}, [r0]
poly16x4_t  vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, [r0]

Load a single lane from memory

uint8x16_t  vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec,
              __constrange(0,15) int lane);                   // VLD1.8 {d0[0]}, [r0]

uint16x8_t  vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, 
              __constrange(0,7) int lane);                    // VLD1.16 {d0[0]}, [r0]

uint32x4_t  vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, 
              __constrange(0,3) int lane);                    // VLD1.32 {d0[0]}, [r0]

uint64x2_t  vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, 
              __constrange(0,1) int lane);                    // VLD1.64 {d0}, [r0]

int8x16_t   vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, 
              __constrange(0,15) int lane);                   // VLD1.8 {d0[0]}, [r0]

int16x8_t   vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec,  
              __constrange(0,7) int lane);                    // VLD1.16 {d0[0]}, [r0]

int32x4_t   vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec,  
              __constrange(0,3) int lane);                    // VLD1.32 {d0[0]}, [r0]

float16x8_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x8_t vec, 
              __constrange(0,7) int lane);                    // VLD1.16 {d0[0]}, [r0]

float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, 
              __constrange(0,3) int lane);                    // VLD1.32 {d0[0]}, [r0]

int64x2_t   vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, 
              __constrange(0,1) int lane);                    // VLD1.64 {d0}, [r0]

poly8x16_t  vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, 
              __constrange(0,15) int lane);                   // VLD1.8 {d0[0]}, [r0]

poly16x8_t  vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, 
              __constrange(0,7) int lane);                    // VLD1.16 {d0[0]}, [r0]

uint8x8_t   vld1_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x8_t vec, 
              __constrange(0,7) int lane);                    // VLD1.8 {d0[0]}, [r0]

uint16x4_t  vld1_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x4_t vec, 
              __constrange(0,3) int lane);                    // VLD1.16 {d0[0]}, [r0]

uint32x2_t  vld1_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x2_t vec, 
              __constrange(0,1) int lane);                    // VLD1.32 {d0[0]}, [r0]

uint64x1_t  vld1_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x1_t vec, 
              __constrange(0,0) int lane);                    // VLD1.64 {d0}, [r0]

int8x8_t  vld1_lane_s8(__transfersize(1) int8_t const * ptr, int8x8_t vec, 
              __constrange(0,7) int lane);                    // VLD1.8 {d0[0]}, [r0]

int16x4_t   vld1_lane_s16(__transfersize(1) int16_t const * ptr, int16x4_t vec, 
              __constrange(0,3) int lane);                    // VLD1.16 {d0[0]}, [r0]

int32x2_t   vld1_lane_s32(__transfersize(1) int32_t const * ptr, int32x2_t vec, 
              __constrange(0,1) int lane);                    // VLD1.32 {d0[0]}, [r0]

float16x4_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x4_t vec, 
              __constrange(0,3) int lane);                    // VLD1.16 {d0[0]}, [r0]

float32x2_t vld1_lane_f32(__transfersize(1) float32_t const * ptr, float32x2_t vec, 
              __constrange(0,1) int lane);                    // VLD1.32 {d0[0]}, [r0]

int64x1_t   vld1_lane_s64(__transfersize(1) int64_t const * ptr, int64x1_t vec, 
              __constrange(0,0) int lane);                    // VLD1.64 {d0}, [r0]

poly8x8_t   vld1_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x8_t vec, 
              __constrange(0,7) int lane);                    // VLD1.8 {d0[0]}, [r0]

poly16x4_t  vld1_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x4_t vec, 
              __constrange(0,3) int lane);                    // VLD1.16 {d0[0]}, [r0]

Load all lanes of vector with same value from memory

uint8x16_t  vld1q_dup_u8(__transfersize(1) uint8_t const * ptr); 
                                                             // VLD1.8 {d0[]}, [r0]
uint16x8_t  vld1q_dup_u16(__transfersize(1) uint16_t const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
uint32x4_t  vld1q_dup_u32(__transfersize(1) uint32_t const * ptr); 
                                                             // VLD1.32 {d0[]}, [r0]
uint64x2_t  vld1q_dup_u64(__transfersize(1) uint64_t const * ptr); 
                                                             // VLD1.64 {d0}, [r0]
int8x16_t   vld1q_dup_s8(__transfersize(1) int8_t const * ptr); 
                                                             // VLD1.8 {d0[]}, [r0]
int16x8_t   vld1q_dup_s16(__transfersize(1) int16_t const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
int32x4_t   vld1q_dup_s32(__transfersize(1) int32_t const * ptr); 
                                                             // VLD1.32 {d0[]}, [r0]
int64x2_t   vld1q_dup_s64(__transfersize(1) int64_t const * ptr); 
                                                             // VLD1.64 {d0}, [r0]
float16x8_t vld1q_dup_f16(__transfersize(1) __fp16 const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
float32x4_t vld1q_dup_f32(__transfersize(1) float32_t const * ptr); 
                                                             // VLD1.32 {d0[]}, [r0]
poly8x16_t  vld1q_dup_p8(__transfersize(1) poly8_t const * ptr); 
                                                             // VLD1.8 {d0[]}, [r0]
poly16x8_t  vld1q_dup_p16(__transfersize(1) poly16_t const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
uint8x8_t   vld1_dup_u8(__transfersize(1) uint8_t const * ptr); 
                                                             // VLD1.8 {d0[]}, [r0]
uint16x4_t  vld1_dup_u16(__transfersize(1) uint16_t const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
uint32x2_t  vld1_dup_u32(__transfersize(1) uint32_t const * ptr); 
                                                             // VLD1.32 {d0[]}, [r0]
uint64x1_t  vld1_dup_u64(__transfersize(1) uint64_t const * ptr); 
                                                             // VLD1.64 {d0}, [r0]
int8x8_t  vld1_dup_s8(__transfersize(1) int8_t const * ptr); 
                                                             // VLD1.8 {d0[]}, [r0]
int16x4_t   vld1_dup_s16(__transfersize(1) int16_t const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
int32x2_t   vld1_dup_s32(__transfersize(1) int32_t const * ptr); 
                                                             // VLD1.32 {d0[]}, [r0]
int64x1_t   vld1_dup_s64(__transfersize(1) int64_t const * ptr); 
                                                             // VLD1.64 {d0}, [r0]
float16x4_t vld1_dup_f16(__transfersize(1) __fp16 const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
float32x2_t vld1_dup_f32(__transfersize(1) float32_t const * ptr); 
                                                             // VLD1.32 {d0[]}, [r0]
poly8x8_t   vld1_dup_p8(__transfersize(1) poly8_t const * ptr); 
                                                             // VLD1.8 {d0[]}, [r0]
poly16x4_t  vld1_dup_p16(__transfersize(1) poly16_t const * ptr); 
                                                             // VLD1.16 {d0[]}, [r0]
Non-ConfidentialPDF file icon PDF versionARM DUI0472K
Copyright © 2010-2014 ARM. All rights reserved.