## 18.19 n 要素構造体のロードの NEON 組み込み関数

また、配列構造体も同様に定義されています。例えば、`int16x4x2_t` 構造体は次のように定義されます。
```struct int16x4x2_t
{
int16x4_t val[2];
};
```

## メモリからの N 要素構造体のロード

```uint8x16x2_t  vld2q_u8(__transfersize(32) uint8_t const * ptr);
VLD2.8      d0, d2 r0
uint16x8x2_t  vld2q_u16(__transfersize(16) uint16_t const * ptr);
// VLD2.16 {d0, d2}, [r0]
uint32x4x2_t  vld2q_u32(__transfersize(8) uint32_t const * ptr);
// VLD2.32 {d0, d2}, [r0]
int8x16x2_t vld2q_s8(__transfersize(32) int8_t const * ptr);
VLD2.8      d0, d2 r0
int16x8x2_t vld2q_s16(__transfersize(16) int16_t const * ptr);
// VLD2.16 {d0, d2}, [r0]
int32x4x2_t vld2q_s32(__transfersize(8) int32_t const * ptr);
// VLD2.32 {d0, d2}, [r0]
float16x8x2_t vld2q_f16(__transfersize(16) __fp16 const * ptr);
// VLD2.16 {d0, d2}, [r0]
float32x4x2_t vld2q_f32(__transfersize(8) float32_t const * ptr);
// VLD2.32 {d0, d2}, [r0]
poly8x16x2_t  vld2q_p8(__transfersize(32) poly8_t const * ptr);
VLD2.8      d0, d2 r0
poly16x8x2_t  vld2q_p16(__transfersize(16) poly16_t const * ptr);
// VLD2.16 {d0, d2}, [r0]
uint8x8x2_t vld2_u8(__transfersize(16) uint8_t const * ptr);
// VLD2.8 {d0, d1}, [r0]
uint16x4x2_t  vld2_u16(__transfersize(8) uint16_t const * ptr);
// VLD2.16 {d0, d1}, [r0]
uint32x2x2_t  vld2_u32(__transfersize(4) uint32_t const * ptr);
// VLD2.32 {d0, d1}, [r0]
uint64x1x2_t  vld2_u64(__transfersize(2) uint64_t const * ptr);
// VLD1.64 {d0, d1}, [r0]
int8x8x2_t  vld2_s8(__transfersize(16) int8_t const * ptr);
// VLD2.8 {d0, d1}, [r0]
int16x4x2_t vld2_s16(__transfersize(8) int16_t const * ptr);
// VLD2.16 {d0, d1}, [r0]
int32x2x2_t vld2_s32(__transfersize(4) int32_t const * ptr);
// VLD2.32 {d0, d1}, [r0]
int64x1x2_t vld2_s64(__transfersize(2) int64_t const * ptr);
// VLD1.64 {d0, d1}, [r0]
float16x4x2_t vld2_f16(__transfersize(8) __fp16 const * ptr);
// VLD2.16 {d0, d1}, [r0]
float32x2x2_t vld2_f32(__transfersize(4) float32_t const * ptr);
// VLD2.32 {d0, d1}, [r0]
poly8x8x2_t vld2_p8(__transfersize(16) poly8_t const * ptr);
// VLD2.8 {d0, d1}, [r0]
poly16x4x2_t  vld2_p16(__transfersize(8) poly16_t const * ptr);
// VLD2.16 {d0, d1}, [r0]
uint8x16x3_t  vld3q_u8(__transfersize(48) uint8_t const * ptr);
// VLD3.8 {d0, d2, d4}, [r0]
uint16x8x3_t  vld3q_u16(__transfersize(24) uint16_t const * ptr);
// VLD3.16 {d0, d2, d4}, [r0]
uint32x4x3_t  vld3q_u32(__transfersize(12) uint32_t const * ptr);
// VLD3.32 {d0, d2, d4}, [r0]
int8x16x3_t vld3q_s8(__transfersize(48) int8_t const * ptr);
// VLD3.8 {d0, d2, d4}, [r0]
int16x8x3_t vld3q_s16(__transfersize(24) int16_t const * ptr);
// VLD3.16 {d0, d2, d4}, [r0]
int32x4x3_t vld3q_s32(__transfersize(12) int32_t const * ptr);
// VLD3.32 {d0, d2, d4}, [r0]
float16x8x3_t vld3q_f16(__transfersize(24) __fp16 const * ptr);
// VLD3.16 {d0, d2, d4}, [r0]
float32x4x3_t vld3q_f32(__transfersize(12) float32_t const * ptr);
// VLD3.32 {d0, d2, d4}, [r0]
poly8x16x3_t  vld3q_p8(__transfersize(48) poly8_t const * ptr);
// VLD3.8 {d0, d2, d4}, [r0]
poly16x8x3_t  vld3q_p16(__transfersize(24) poly16_t const * ptr);
// VLD3.16 {d0, d2, d4}, [r0]
uint8x8x3_t vld3_u8(__transfersize(24) uint8_t const * ptr);
// VLD3.8 {d0, d1, d2}, [r0]
uint16x4x3_t  vld3_u16(__transfersize(12) uint16_t const * ptr);
// VLD3.16 {d0, d1, d2}, [r0]
uint32x2x3_t  vld3_u32(__transfersize(6) uint32_t const * ptr);
// VLD3.32 {d0, d1, d2}, [r0]
uint64x1x3_t  vld3_u64(__transfersize(3) uint64_t const * ptr);
// VLD1.64 {d0, d1, d2}, [r0]
int8x8x3_t  vld3_s8(__transfersize(24) int8_t const * ptr);
// VLD3.8 {d0, d1, d2}, [r0]
int16x4x3_t vld3_s16(__transfersize(12) int16_t const * ptr);
// VLD3.16 {d0, d1, d2}, [r0]
int32x2x3_t vld3_s32(__transfersize(6) int32_t const * ptr);
// VLD3.32 {d0, d1, d2}, [r0]
int64x1x3_t vld3_s64(__transfersize(3) int64_t const * ptr);
// VLD1.64 {d0, d1, d2}, [r0]
float16x4x3_t vld3_f16(__transfersize(12) __fp16 const * ptr);
// VLD3.16 {d0, d1, d2}, [r0]
float32x2x3_t vld3_f32(__transfersize(6) float32_t const * ptr);
// VLD3.32 {d0, d1, d2}, [r0]
poly8x8x3_t vld3_p8(__transfersize(24) poly8_t const * ptr);
// VLD3.8 {d0, d1, d2}, [r0]
poly16x4x3_t  vld3_p16(__transfersize(12) poly16_t const * ptr);
// VLD3.16 {d0, d1, d2}, [r0]
uint8x16x4_t  vld4q_u8(__transfersize(64) uint8_t const * ptr);
// VLD4.8 {d0, d2, d4, d6}, [r0]
uint16x8x4_t  vld4q_u16(__transfersize(32) uint16_t const * ptr);
// VLD4.16 {d0, d2, d4, d6}, [r0]
uint32x4x4_t  vld4q_u32(__transfersize(16) uint32_t const * ptr);
// VLD4.32 {d0, d2, d4, d6}, [r0]
int8x16x4_t vld4q_s8(__transfersize(64) int8_t const * ptr);
// VLD4.8 {d0, d2, d4, d6}, [r0]
int16x8x4_t vld4q_s16(__transfersize(32) int16_t const * ptr);
// VLD4.16 {d0, d2, d4, d6}, [r0]
int32x4x4_t vld4q_s32(__transfersize(16) int32_t const * ptr);
// VLD4.32 {d0, d2, d4, d6}, [r0]
float16x8x4_t vld4q_f16(__transfersize(32) __fp16 const * ptr);
// VLD4.16 {d0, d2, d4, d6}, [r0]
float32x4x4_t vld4q_f32(__transfersize(16) float32_t const * ptr);
// VLD4.32 {d0, d2, d4, d6}, [r0]
poly8x16x4_t  vld4q_p8(__transfersize(64) poly8_t const * ptr);
// VLD4.8 {d0, d2, d4, d6}, [r0]
poly16x8x4_t  vld4q_p16(__transfersize(32) poly16_t const * ptr);
// VLD4.16 {d0, d2, d4, d6}, [r0]
uint8x8x4_t vld4_u8(__transfersize(32) uint8_t const * ptr);
// VLD4.8 {d0, d1, d2, d3}, [r0]
uint16x4x4_t  vld4_u16(__transfersize(16) uint16_t const * ptr);
// VLD4.16 {d0, d1, d2, d3}, [r0]
uint32x2x4_t  vld4_u32(__transfersize(8) uint32_t const * ptr);
// VLD4.32 {d0, d1, d2, d3}, [r0]
uint64x1x4_t  vld4_u64(__transfersize(4) uint64_t const * ptr);
// VLD1.64 {d0, d1, d2, d3}, [r0]
int8x8x4_t  vld4_s8(__transfersize(32) int8_t const * ptr);
// VLD4.8 {d0, d1, d2, d3}, [r0]
int16x4x4_t vld4_s16(__transfersize(16) int16_t const * ptr);
// VLD4.16 {d0, d1, d2, d3}, [r0]
int32x2x4_t vld4_s32(__transfersize(8) int32_t const * ptr);
// VLD4.32 {d0, d1, d2, d3}, [r0]
int64x1x4_t vld4_s64(__transfersize(4) int64_t const * ptr);
// VLD1.64 {d0, d1, d2, d3}, [r0]
float16x4x4_t vld4_f16(__transfersize(16) __fp16 const * ptr);
// VLD4.16 {d0, d1, d2, d3}, [r0]
float32x2x4_t vld4_f32(__transfersize(8) float32_t const * ptr);
// VLD4.32 {d0, d1, d2, d3}, [r0]
poly8x8x4_t vld4_p8(__transfersize(32) poly8_t const * ptr);
// VLD4.8 {d0, d1, d2, d3}, [r0]
poly16x4x4_t  vld4_p16(__transfersize(16) poly16_t const * ptr);
// VLD4.16 {d0, d1, d2, d3}, [r0]
```

## メモリからの同一の値をもつすべての N 要素構造体レーンのロード

```uint8x8x2_t vld2_dup_u8(__transfersize(2) uint8_t const * ptr);
// VLD2.8 {d0[], d1[]}, [r0]
uint16x4x2_t  vld2_dup_u16(__transfersize(2) uint16_t const * ptr);
// VLD2.16 {d0[], d1[]}, [r0]
uint32x2x2_t  vld2_dup_u32(__transfersize(2) uint32_t const * ptr);
// VLD2.32 {d0[], d1[]}, [r0]
uint64x1x2_t  vld2_dup_u64(__transfersize(2) uint64_t const * ptr);
// VLD1.64 {d0, d1}, [r0]
int8x8x2_t  vld2_dup_s8(__transfersize(2) int8_t const * ptr);
// VLD2.8 {d0[], d1[]}, [r0]
int16x4x2_t vld2_dup_s16(__transfersize(2) int16_t const * ptr);
// VLD2.16 {d0[], d1[]}, [r0]
int32x2x2_t vld2_dup_s32(__transfersize(2) int32_t const * ptr);
// VLD2.32 {d0[], d1[]}, [r0]
int64x1x2_t vld2_dup_s64(__transfersize(2) int64_t const * ptr);
// VLD1.64 {d0, d1}, [r0]
float16x4x2_t vld2_dup_f16(__transfersize(2) __fp16 const * ptr);
// VLD2.16 {d0[], d1[]}, [r0]
float32x2x2_t vld2_dup_f32(__transfersize(2) float32_t const * ptr);
// VLD2.32 {d0[], d1[]}, [r0]
poly8x8x2_t vld2_dup_p8(__transfersize(2) poly8_t const * ptr);
// VLD2.8 {d0[], d1[]}, [r0]
poly16x4x2_t  vld2_dup_p16(__transfersize(2) poly16_t const * ptr);
// VLD2.16 {d0[], d1[]}, [r0]
uint8x8x3_t vld3_dup_u8(__transfersize(3) uint8_t const * ptr);
// VLD3.8 {d0[], d1[], d2[]}, [r0]
uint16x4x3_t  vld3_dup_u16(__transfersize(3) uint16_t const * ptr);
// VLD3.16 {d0[], d1[], d2[]}, [r0]
uint32x2x3_t  vld3_dup_u32(__transfersize(3) uint32_t const * ptr);
// VLD3.32 {d0[], d1[], d2[]}, [r0]
uint64x1x3_t  vld3_dup_u64(__transfersize(3) uint64_t const * ptr);
// VLD1.64 {d0, d1, d2}, [r0]
int8x8x3_t  vld3_dup_s8(__transfersize(3) int8_t const * ptr);
// VLD3.8 {d0[], d1[], d2[]}, [r0]
int16x4x3_t vld3_dup_s16(__transfersize(3) int16_t const * ptr);
// VLD3.16 {d0[], d1[], d2[]}, [r0]
int32x2x3_t vld3_dup_s32(__transfersize(3) int32_t const * ptr);
// VLD3.32 {d0[], d1[], d2[]}, [r0]
int64x1x3_t vld3_dup_s64(__transfersize(3) int64_t const * ptr);
// VLD1.64 {d0, d1, d2}, [r0]
float16x4x3_t vld3_dup_f16(__transfersize(3) __fp16 const * ptr);
// VLD3.16 {d0[], d1[], d2[]}, [r0]
float32x2x3_t vld3_dup_f32(__transfersize(3) float32_t const * ptr);
// VLD3.32 {d0[], d1[], d2[]}, [r0]
poly8x8x3_t vld3_dup_p8(__transfersize(3) poly8_t const * ptr);
// VLD3.8 {d0[], d1[], d2[]}, [r0]
poly16x4x3_t  vld3_dup_p16(__transfersize(3) poly16_t const * ptr);
// VLD3.16 {d0[], d1[], d2[]}, [r0]
uint8x8x4_t vld4_dup_u8(__transfersize(4) uint8_t const * ptr);
// VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
uint16x4x4_t  vld4_dup_u16(__transfersize(4) uint16_t const * ptr);
// VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
uint32x2x4_t  vld4_dup_u32(__transfersize(4) uint32_t const * ptr);
// VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
uint64x1x4_t  vld4_dup_u64(__transfersize(4) uint64_t const * ptr);
// VLD1.64 {d0, d1, d2, d3}, [r0]
int8x8x4_t  vld4_dup_s8(__transfersize(4) int8_t const * ptr);
// VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
int16x4x4_t vld4_dup_s16(__transfersize(4) int16_t const * ptr);
// VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
int32x2x4_t vld4_dup_s32(__transfersize(4) int32_t const * ptr);
// VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
int64x1x4_t vld4_dup_s64(__transfersize(4) int64_t const * ptr);
// VLD1.64 {d0, d1, d2, d3}, [r0]
float16x4x4_t vld4_dup_f16(__transfersize(4) __fp16 const * ptr);
// VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
float32x2x4_t vld4_dup_f32(__transfersize(4) float32_t const * ptr);
// VLD4.32 {d0[], d1[], d2[], d3[]}, [r0]
poly8x8x4_t vld4_dup_p8(__transfersize(4) poly8_t const * ptr);
// VLD4.8 {d0[], d1[], d2[], d3[]}, [r0]
poly16x4x4_t  vld4_dup_p16(__transfersize(4) poly16_t const * ptr);
// VLD4.16 {d0[], d1[], d2[], d3[]}, [r0]
```

## メモリからの N 要素構造体の単一のレーンのロード

```uint16x8x2_t  vld2q_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x8x2_t src,
__constrange(0,7) int lane);     // VLD2.16 {d0[0], d2[0]}, [r0]

uint32x4x2_t  vld2q_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x4x2_t src,
__constrange(0,3) int lane);     // VLD2.32 {d0[0], d2[0]}, [r0]

int16x8x2_t vld2q_lane_s16(__transfersize(2) int16_t const * ptr, int16x8x2_t src,
__constrange(0,7) int lane);     // VLD2.16 {d0[0], d2[0]}, [r0]

int32x4x2_t vld2q_lane_s32(__transfersize(2) int32_t const * ptr, int32x4x2_t src,
__constrange(0,3) int lane);     // VLD2.32 {d0[0], d2[0]}, [r0]

float16x8x2_t vld2q_lane_f16(__transfersize(2) __fp16 const * ptr, float16x8x2_t src,
__constrange(0,7) int lane);     // VLD2.16 {d0[0], d2[0]}, [r0]

float32x4x2_t vld2q_lane_f32(__transfersize(2) float32_t const * ptr, float32x4x2_t
src, __constrange(0,3) int lane); // VLD2.32 {d0[0], d2[0]}, [r0]

poly16x8x2_t  vld2q_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x8x2_t src,
__constrange(0,7) int lane);     // VLD2.16 {d0[0], d2[0]}, [r0]

uint8x8x2_t vld2_lane_u8(__transfersize(2) uint8_t const * ptr, uint8x8x2_t src,
__constrange(0,7) int lane);     // VLD2.8 {d0[0], d1[0]}, [r0]

uint16x4x2_t  vld2_lane_u16(__transfersize(2) uint16_t const * ptr, uint16x4x2_t src,
__constrange(0,3) int lane);     // VLD2.16 {d0[0], d1[0]}, [r0]

uint32x2x2_t  vld2_lane_u32(__transfersize(2) uint32_t const * ptr, uint32x2x2_t src,
__constrange(0,1) int lane);     // VLD2.32 {d0[0], d1[0]}, [r0]

int8x8x2_t  vld2_lane_s8(__transfersize(2) int8_t const * ptr, int8x8x2_t src,
__constrange(0,7) int lane);     // VLD2.8 {d0[0], d1[0]}, [r0]

int16x4x2_t vld2_lane_s16(__transfersize(2) int16_t const * ptr, int16x4x2_t src,
__constrange(0,3) int lane);     // VLD2.16 {d0[0], d1[0]}, [r0]

int32x2x2_t vld2_lane_s32(__transfersize(2) int32_t const * ptr, int32x2x2_t src,
__constrange(0,1) int lane);     // VLD2.32 {d0[0], d1[0]}, [r0]

float16x4x2_t vld2_lane_f16(__transfersize(2) __fp16 const * ptr, float16x4x2_t src,
__constrange(0,3) int lane);     // VLD2.16 {d0[0], d1[0]}, [r0]

float32x2x2_t vld2_lane_f32(__transfersize(2) float32_t const * ptr, float32x2x2_t
src, __constrange(0,1) int lane); // VLD2.32 {d0[0], d1[0]}, [r0]

poly8x8x2_t vld2_lane_p8(__transfersize(2) poly8_t const * ptr, poly8x8x2_t src,
__constrange(0,7) int lane);     // VLD2.8 {d0[0], d1[0]}, [r0]

poly16x4x2_t  vld2_lane_p16(__transfersize(2) poly16_t const * ptr, poly16x4x2_t src,
__constrange(0,3) int lane);     // VLD2.16 {d0[0], d1[0]}, [r0]

uint16x8x3_t  vld3q_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x8x3_t src,
__constrange(0,7) int lane);     // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]

uint32x4x3_t  vld3q_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x4x3_t src,
__constrange(0,3) int lane);     // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]

int16x8x3_t vld3q_lane_s16(__transfersize(3) int16_t const * ptr, int16x8x3_t src,
__constrange(0,7) int lane);     // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]

int32x4x3_t vld3q_lane_s32(__transfersize(3) int32_t const * ptr, int32x4x3_t src,
__constrange(0,3) int lane);     // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]

float16x8x3_t vld3q_lane_f16(__transfersize(3) __fp16 const * ptr, float16x8x3_t src,
__constrange(0,7) int lane);     // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]

float32x4x3_t vld3q_lane_f32(__transfersize(3) float32_t const * ptr, float32x4x3_t
src, __constrange(0,3) int lane); // VLD3.32 {d0[0], d2[0], d4[0]}, [r0]

poly16x8x3_t  vld3q_lane_p16(__transfersize(3) poly16_t const * ptr, poly16x8x3_t src,
__constrange(0,7) int lane);     // VLD3.16 {d0[0], d2[0], d4[0]}, [r0]

uint8x8x3_t vld3_lane_u8(__transfersize(3) uint8_t const * ptr, uint8x8x3_t src,
__constrange(0,7) int lane);     // VLD3.8 {d0[0], d1[0], d2[0]}, [r0]

uint16x4x3_t  vld3_lane_u16(__transfersize(3) uint16_t const * ptr, uint16x4x3_t src,
__constrange(0,3) int lane);     // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]

uint32x2x3_t  vld3_lane_u32(__transfersize(3) uint32_t const * ptr, uint32x2x3_t src,
__constrange(0,1) int lane);     // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]

int8x8x3_t  vld3_lane_s8(__transfersize(3) int8_t const * ptr, int8x8x3_t src,
__constrange(0,7) int lane);     // VLD3.8 {d0[0], d1[0], d2[0]}, [r0]

int16x4x3_t vld3_lane_s16(__transfersize(3) int16_t const * ptr, int16x4x3_t src,
__constrange(0,3) int lane);     // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]

int32x2x3_t vld3_lane_s32(__transfersize(3) int32_t const * ptr, int32x2x3_t src,
__constrange(0,1) int lane);     // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]

float16x4x3_t vld3_lane_f16(__transfersize(3) __fp16 const * ptr, float16x4x3_t src,
__constrange(0,3) int lane);     // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]

float32x2x3_t vld3_lane_f32(__transfersize(3) float32_t const * ptr, float32x2x3_t
src, __constrange(0,1) int lane); // VLD3.32 {d0[0], d1[0], d2[0]}, [r0]

poly8x8x3_t vld3_lane_p8(__transfersize(3) poly8_t const * ptr, poly8x8x3_t src,
__constrange(0,7) int lane);     // VLD3.8 {d0[0], d1[0], d2[0]}, [r0]

poly16x4x3_t  vld3_lane_p16(__transfersize(3) poly16_t const * ptr, poly16x4x3_t src,
__constrange(0,3) int lane);     // VLD3.16 {d0[0], d1[0], d2[0]}, [r0]

uint16x8x4_t  vld4q_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x8x4_t src,
__constrange(0,7) int lane);      // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

uint32x4x4_t  vld4q_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x4x4_t src,
__constrange(0,3) int lane);      // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]

int16x8x4_t vld4q_lane_s16(__transfersize(4) int16_t const * ptr, int16x8x4_t src,
__constrange(0,7) int lane);      // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

int32x4x4_t vld4q_lane_s32(__transfersize(4) int32_t const * ptr, int32x4x4_t src,
__constrange(0,3) int lane);      // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]

float16x8x4_t vld4q_lane_f16(__transfersize(4) __fp16 const * ptr, float16x8x4_t src,
__constrange(0,7) int lane);      // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

float32x4x4_t vld4q_lane_f32(__transfersize(4) float32_t const * ptr, float32x4x4_t
src, __constrange(0,3) int lane); // VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]

poly16x8x4_t  vld4q_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x8x4_t src,
__constrange(0,7) int lane);      // VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

uint8x8x4_t vld4_lane_u8(__transfersize(4) uint8_t const * ptr, uint8x8x4_t src,
__constrange(0,7) int lane);      // VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]

uint16x4x4_t  vld4_lane_u16(__transfersize(4) uint16_t const * ptr, uint16x4x4_t src,
__constrange(0,3) int lane);      // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

uint32x2x4_t  vld4_lane_u32(__transfersize(4) uint32_t const * ptr, uint32x2x4_t src,
__constrange(0,1) int lane);      // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]

int8x8x4_t  vld4_lane_s8(__transfersize(4) int8_t const * ptr, int8x8x4_t src,
__constrange(0,7) int lane);      // VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]

int16x4x4_t vld4_lane_s16(__transfersize(4) int16_t const * ptr, int16x4x4_t src,
__constrange(0,3) int lane);      // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

int32x2x4_t vld4_lane_s32(__transfersize(4) int32_t const * ptr, int32x2x4_t src,
__constrange(0,1) int lane);      // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]

float16x4x4_t vld4_lane_f16(__transfersize(4) __fp16 const * ptr, float16x4x4_t src,
__constrange(0,3) int lane);      // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

float32x2x4_t vld4_lane_f32(__transfersize(4) float32_t const * ptr, float32x2x4_t
src, __constrange(0,1) int lane); // VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]

poly8x8x4_t vld4_lane_p8(__transfersize(4) poly8_t const * ptr, poly8x8x4_t src,
__constrange(0,7) int lane);      // VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]

poly16x4x4_t  vld4_lane_p16(__transfersize(4) poly16_t const * ptr, poly16x4x4_t src,
__constrange(0,3) int lane);      // VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
```

## メモリへの N 要素構造体のストア

```void  vst2q_u8(__transfersize(32) uint8_t * ptr, uint8x16x2_t val);
// VST2.8 {d0, d2}, [r0]
void  vst2q_u16(__transfersize(16) uint16_t * ptr, uint16x8x2_t val);
// VST2.16 {d0, d2}, [r0]
void  vst2q_u32(__transfersize(8) uint32_t * ptr, uint32x4x2_t val);
// VST2.32 {d0, d2}, [r0]
void  vst2q_s8(__transfersize(32) int8_t * ptr, int8x16x2_t val);
// VST2.8 {d0, d2}, [r0]
void  vst2q_s16(__transfersize(16) int16_t * ptr, int16x8x2_t val);
// VST2.16 {d0, d2}, [r0]
void  vst2q_s32(__transfersize(8) int32_t * ptr, int32x4x2_t val);
// VST2.32 {d0, d2}, [r0]
void  vst2q_f16(__transfersize(16) __fp16 * ptr, float16x8x2_t val);
// VST2.16 {d0, d2}, [r0]
void  vst2q_f32(__transfersize(8) float32_t * ptr, float32x4x2_t val);
// VST2.32 {d0, d2}, [r0]
void  vst2q_p8(__transfersize(32) poly8_t * ptr, poly8x16x2_t val);
// VST2.8 {d0, d2}, [r0]
void  vst2q_p16(__transfersize(16) poly16_t * ptr, poly16x8x2_t val);
// VST2.16 {d0, d2}, [r0]
void  vst2_u8(__transfersize(16) uint8_t * ptr, uint8x8x2_t val);
// VST2.8 {d0, d1}, [r0]
void  vst2_u16(__transfersize(8) uint16_t * ptr, uint16x4x2_t val);
// VST2.16 {d0, d1}, [r0]
void  vst2_u32(__transfersize(4) uint32_t * ptr, uint32x2x2_t val);
// VST2.32 {d0, d1}, [r0]
void  vst2_u64(__transfersize(2) uint64_t * ptr, uint64x1x2_t val);
// VST1.64 {d0, d1}, [r0]
void  vst2_s8(__transfersize(16) int8_t * ptr, int8x8x2_t val);
// VST2.8 {d0, d1}, [r0]
void  vst2_s16(__transfersize(8) int16_t * ptr, int16x4x2_t val);
// VST2.16 {d0, d1}, [r0]
void  vst2_s32(__transfersize(4) int32_t * ptr, int32x2x2_t val);
// VST2.32 {d0, d1}, [r0]
void  vst2_s64(__transfersize(2) int64_t * ptr, int64x1x2_t val);
// VST1.64 {d0, d1}, [r0]
void  vst2_f16(__transfersize(8) __fp16 * ptr, float16x4x2_t val);
// VST2.16 {d0, d1}, [r0]
void  vst2_f32(__transfersize(4) float32_t * ptr, float32x2x2_t val);
// VST2.32 {d0, d1}, [r0]
void  vst2_p8(__transfersize(16) poly8_t * ptr, poly8x8x2_t val);
// VST2.8 {d0, d1}, [r0]
void  vst2_p16(__transfersize(8) poly16_t * ptr, poly16x4x2_t val);
// VST2.16 {d0, d1}, [r0]
void  vst3q_u8(__transfersize(48) uint8_t * ptr, uint8x16x3_t val);
// VST3.8 {d0, d2, d4}, [r0]
void  vst3q_u16(__transfersize(24) uint16_t * ptr, uint16x8x3_t val);
// VST3.16 {d0, d2, d4}, [r0]
void  vst3q_u32(__transfersize(12) uint32_t * ptr, uint32x4x3_t val);
// VST3.32 {d0, d2, d4}, [r0]
void  vst3q_s8(__transfersize(48) int8_t * ptr, int8x16x3_t val);
// VST3.8 {d0, d2, d4}, [r0]
void  vst3q_s16(__transfersize(24) int16_t * ptr, int16x8x3_t val);
// VST3.16 {d0, d2, d4}, [r0]
void  vst3q_s32(__transfersize(12) int32_t * ptr, int32x4x3_t val);
// VST3.32 {d0, d2, d4}, [r0]
void  vst3q_f16(__transfersize(24) __fp16 * ptr, float16x8x3_t val);
// VST3.16 {d0, d2, d4}, [r0]
void  vst3q_f32(__transfersize(12) float32_t * ptr, float32x4x3_t val);
// VST3.32 {d0, d2, d4}, [r0]
void  vst3q_p8(__transfersize(48) poly8_t * ptr, poly8x16x3_t val);
// VST3.8 {d0, d2, d4}, [r0]
void  vst3q_p16(__transfersize(24) poly16_t * ptr, poly16x8x3_t val);
// VST3.16 {d0, d2, d4}, [r0]
void  vst3_u8(__transfersize(24) uint8_t * ptr, uint8x8x3_t val);
// VST3.8 {d0, d1, d2}, [r0]
void  vst3_u16(__transfersize(12) uint16_t * ptr, uint16x4x3_t val);
// VST3.16 {d0, d1, d2}, [r0]
void  vst3_u32(__transfersize(6) uint32_t * ptr, uint32x2x3_t val);
// VST3.32 {d0, d1, d2}, [r0]
void  vst3_u64(__transfersize(3) uint64_t * ptr, uint64x1x3_t val);
// VST1.64 {d0, d1, d2}, [r0]
void  vst3_s8(__transfersize(24) int8_t * ptr, int8x8x3_t val);
// VST3.8 {d0, d1, d2}, [r0]
void  vst3_s16(__transfersize(12) int16_t * ptr, int16x4x3_t val);
// VST3.16 {d0, d1, d2}, [r0]
void  vst3_s32(__transfersize(6) int32_t * ptr, int32x2x3_t val);
// VST3.32 {d0, d1, d2}, [r0]
void  vst3_s64(__transfersize(3) int64_t * ptr, int64x1x3_t val);
// VST1.64 {d0, d1, d2}, [r0]
void  vst3_f16(__transfersize(12) __fp16 * ptr, float16x4x3_t val);
// VST3.16 {d0, d1, d2}, [r0]
void  vst3_f32(__transfersize(6) float32_t * ptr, float32x2x3_t val);
// VST3.32 {d0, d1, d2}, [r0]
void  vst3_p8(__transfersize(24) poly8_t * ptr, poly8x8x3_t val);
// VST3.8 {d0, d1, d2}, [r0]
void  vst3_p16(__transfersize(12) poly16_t * ptr, poly16x4x3_t val);
// VST3.16 {d0, d1, d2}, [r0]
void  vst4q_u8(__transfersize(64) uint8_t * ptr, uint8x16x4_t val);
// VST4.8 {d0, d2, d4, d6}, [r0]
void  vst4q_u16(__transfersize(32) uint16_t * ptr, uint16x8x4_t val);
// VST4.16 {d0, d2, d4, d6}, [r0]
void  vst4q_u32(__transfersize(16) uint32_t * ptr, uint32x4x4_t val);
// VST4.32 {d0, d2, d4, d6}, [r0]
void  vst4q_s8(__transfersize(64) int8_t * ptr, int8x16x4_t val);
// VST4.8 {d0, d2, d4, d6}, [r0]
void  vst4q_s16(__transfersize(32) int16_t * ptr, int16x8x4_t val);
// VST4.16 {d0, d2, d4, d6}, [r0]
void  vst4q_s32(__transfersize(16) int32_t * ptr, int32x4x4_t val);
// VST4.32 {d0, d2, d4, d6}, [r0]
void  vst4q_f16(__transfersize(32) __fp16 * ptr, float16x8x4_t val);
// VST4.16 {d0, d2, d4, d6}, [r0]
void  vst4q_f32(__transfersize(16) float32_t * ptr, float32x4x4_t val);
// VST4.32 {d0, d2, d4, d6}, [r0]
void  vst4q_p8(__transfersize(64) poly8_t * ptr, poly8x16x4_t val);
// VST4.8 {d0, d2, d4, d6}, [r0]
void  vst4q_p16(__transfersize(32) poly16_t * ptr, poly16x8x4_t val);
// VST4.16 {d0, d2, d4, d6}, [r0]
void  vst4_u8(__transfersize(32) uint8_t * ptr, uint8x8x4_t val);
// VST4.8 {d0, d1, d2, d3}, [r0]
void  vst4_u16(__transfersize(16) uint16_t * ptr, uint16x4x4_t val);
// VST4.16 {d0, d1, d2, d3}, [r0]
void  vst4_u32(__transfersize(8) uint32_t * ptr, uint32x2x4_t val);
// VST4.32 {d0, d1, d2, d3}, [r0]
void  vst4_u64(__transfersize(4) uint64_t * ptr, uint64x1x4_t val);
// VST1.64 {d0, d1, d2, d3}, [r0]
void  vst4_s8(__transfersize(32) int8_t * ptr, int8x8x4_t val);
// VST4.8 {d0, d1, d2, d3}, [r0]
void  vst4_s16(__transfersize(16) int16_t * ptr, int16x4x4_t val);
// VST4.16 {d0, d1, d2, d3}, [r0]
void  vst4_s32(__transfersize(8) int32_t * ptr, int32x2x4_t val);
// VST4.32 {d0, d1, d2, d3}, [r0]
void  vst4_s64(__transfersize(4) int64_t * ptr, int64x1x4_t val);
// VST1.64 {d0, d1, d2, d3}, [r0]
void  vst4_f16(__transfersize(16) __fp16 * ptr, float16x4x4_t val);
// VST4.16 {d0, d1, d2, d3}, [r0]
void  vst4_f32(__transfersize(8) float32_t * ptr, float32x2x4_t val);
// VST4.32 {d0, d1, d2, d3}, [r0]
void  vst4_p8(__transfersize(32) poly8_t * ptr, poly8x8x4_t val);
// VST4.8 {d0, d1, d2, d3}, [r0]
void  vst4_p16(__transfersize(16) poly16_t * ptr, poly16x4x4_t val);
// VST4.16 {d0, d1, d2, d3}, [r0]
```

## メモリへの N 要素構造体の単一のレーンのストア

```void  vst2q_lane_u16(__transfersize(2) uint16_t * ptr, uint16x8x2_t val,
__constrange(0,7) int lane);             // VST2.16 {d0[0], d2[0]}, [r0]

void  vst2q_lane_u32(__transfersize(2) uint32_t * ptr, uint32x4x2_t val,
__constrange(0,3) int lane);             // VST2.32 {d0[0], d2[0]}, [r0]

void  vst2q_lane_s16(__transfersize(2) int16_t * ptr, int16x8x2_t val,
__constrange(0,7) int lane);             // VST2.16 {d0[0], d2[0]}, [r0]

void  vst2q_lane_s32(__transfersize(2) int32_t * ptr, int32x4x2_t val,
__constrange(0,3) int lane);             // VST2.32 {d0[0], d2[0]}, [r0]

void  vst2q_lane_f16(__transfersize(2) __fp16 * ptr, float16x8x2_t val,
__constrange(0,7) int lane);             // VST2.16 {d0[0], d2[0]}, [r0]

void  vst2q_lane_f32(__transfersize(2) float32_t * ptr, float32x4x2_t val,
__constrange(0,3) int lane);             // VST2.32 {d0[0], d2[0]}, [r0]

void  vst2q_lane_p16(__transfersize(2) poly16_t * ptr, poly16x8x2_t val,
__constrange(0,7) int lane);             // VST2.16 {d0[0], d2[0]}, [r0]

void  vst2_lane_u8(__transfersize(2) uint8_t * ptr, uint8x8x2_t val,
__constrange(0,7) int lane);             // VST2.8 {d0[0], d1[0]}, [r0]

void  vst2_lane_u16(__transfersize(2) uint16_t * ptr, uint16x4x2_t val,
__constrange(0,3) int lane);             // VST2.16 {d0[0], d1[0]}, [r0]

void  vst2_lane_u32(__transfersize(2) uint32_t * ptr, uint32x2x2_t val,
__constrange(0,1) int lane);             // VST2.32 {d0[0], d1[0]}, [r0]

void  vst2_lane_s8(__transfersize(2) int8_t * ptr, int8x8x2_t val,
__constrange(0,7) int lane);             // VST2.8 {d0[0], d1[0]}, [r0]

void  vst2_lane_s16(__transfersize(2) int16_t * ptr, int16x4x2_t val,
__constrange(0,3) int lane);             // VST2.16 {d0[0], d1[0]}, [r0]

void  vst2_lane_s32(__transfersize(2) int32_t * ptr, int32x2x2_t val,
__constrange(0,1) int lane);             // VST2.32 {d0[0], d1[0]}, [r0]

void  vst2_lane_f16(__transfersize(2) __fp16 * ptr, float16x4x2_t val,
__constrange(0,3) int lane);             // VST2.16 {d0[0], d1[0]}, [r0]

void  vst2_lane_f32(__transfersize(2) float32_t * ptr, float32x2x2_t val,
__constrange(0,1) int lane);             // VST2.32 {d0[0], d1[0]}, [r0]

void  vst2_lane_p8(__transfersize(2) poly8_t * ptr, poly8x8x2_t val,
__constrange(0,7) int lane);             // VST2.8 {d0[0], d1[0]}, [r0]

void  vst2_lane_p16(__transfersize(2) poly16_t * ptr, poly16x4x2_t val,
__constrange(0,3) int lane);             // VST2.16 {d0[0], d1[0]}, [r0]

void  vst3q_lane_u16(__transfersize(3) uint16_t * ptr, uint16x8x3_t val,
__constrange(0,7) int lane);             // VST3.16 {d0[0], d2[0], d4[0]}, [r0]

void  vst3q_lane_u32(__transfersize(3) uint32_t * ptr, uint32x4x3_t val,
__constrange(0,3) int lane);             // VST3.32 {d0[0], d2[0], d4[0]}, [r0]

void  vst3q_lane_s16(__transfersize(3) int16_t * ptr, int16x8x3_t val,
__constrange(0,7) int lane);             // VST3.16 {d0[0], d2[0], d4[0]}, [r0]

void  vst3q_lane_s32(__transfersize(3) int32_t * ptr, int32x4x3_t val,
__constrange(0,3) int lane);             // VST3.32 {d0[0], d2[0], d4[0]}, [r0]

void  vst3q_lane_f16(__transfersize(3) __fp16 * ptr, float16x8x3_t val,
__constrange(0,7) int lane);             // VST3.16 {d0[0], d2[0], d4[0]}, [r0]

void  vst3q_lane_f32(__transfersize(3) float32_t * ptr, float32x4x3_t val,
__constrange(0,3) int lane);             // VST3.32 {d0[0], d2[0], d4[0]}, [r0]

void  vst3q_lane_p16(__transfersize(3) poly16_t * ptr, poly16x8x3_t val,
__constrange(0,7) int lane);             // VST3.16 {d0[0], d2[0], d4[0]}, [r0]

void  vst3_lane_u8(__transfersize(3) uint8_t * ptr, uint8x8x3_t val,
__constrange(0,7) int lane);             // VST3.8 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_u16(__transfersize(3) uint16_t * ptr, uint16x4x3_t val,
__constrange(0,3) int lane);             // VST3.16 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_u32(__transfersize(3) uint32_t * ptr, uint32x2x3_t val,
__constrange(0,1) int lane);             // VST3.32 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_s8(__transfersize(3) int8_t * ptr, int8x8x3_t val,
__constrange(0,7) int lane);             // VST3.8 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_s16(__transfersize(3) int16_t * ptr, int16x4x3_t val,
__constrange(0,3) int lane);             // VST3.16 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_s32(__transfersize(3) int32_t * ptr, int32x2x3_t val,
__constrange(0,1) int lane);             // VST3.32 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_f16(__transfersize(3) __fp16 * ptr, float16x4x3_t val,
__constrange(0,3) int lane);             // VST3.16 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_f32(__transfersize(3) float32_t * ptr, float32x2x3_t val,
__constrange(0,1) int lane);             // VST3.32 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_p8(__transfersize(3) poly8_t * ptr, poly8x8x3_t val,
__constrange(0,7) int lane);             // VST3.8 {d0[0], d1[0], d2[0]}, [r0]

void  vst3_lane_p16(__transfersize(3) poly16_t * ptr, poly16x4x3_t val,
__constrange(0,3) int lane);             // VST3.16 {d0[0], d1[0], d2[0]}, [r0]

void  vst4q_lane_u16(__transfersize(4) uint16_t * ptr, uint16x8x4_t val,
__constrange(0,7) int lane);     // VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4q_lane_u32(__transfersize(4) uint32_t * ptr, uint32x4x4_t val,
__constrange(0,3) int lane);     // VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4q_lane_s16(__transfersize(4) int16_t * ptr, int16x8x4_t val,
__constrange(0,7) int lane);     // VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4q_lane_s32(__transfersize(4) int32_t * ptr, int32x4x4_t val,
__constrange(0,3) int lane);     // VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4q_lane_f16(__transfersize(4) __fp16 * ptr, float16x8x4_t val,
__constrange(0,7) int lane);     // VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4q_lane_f32(__transfersize(4) float32_t * ptr, float32x4x4_t val,
__constrange(0,3) int lane);     // VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4q_lane_p16(__transfersize(4) poly16_t * ptr, poly16x8x4_t val,
__constrange(0,7) int lane);     // VST4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0]

void  vst4_lane_u8(__transfersize(4) uint8_t * ptr, uint8x8x4_t val,
__constrange(0,7) int lane);     // VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_u16(__transfersize(4) uint16_t * ptr, uint16x4x4_t val,
__constrange(0,3) int lane);     // VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_u32(__transfersize(4) uint32_t * ptr, uint32x2x4_t val,
__constrange(0,1) int lane);     // VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_s8(__transfersize(4) int8_t * ptr, int8x8x4_t val,
__constrange(0,7) int lane);     // VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_s16(__transfersize(4) int16_t * ptr, int16x4x4_t val,
__constrange(0,3) int lane);     // VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_s32(__transfersize(4) int32_t * ptr, int32x2x4_t val,
__constrange(0,1) int lane);     // VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_f16(__transfersize(4) __fp16 * ptr, float16x4x4_t val,
__constrange(0,3) int lane);     // VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_f32(__transfersize(4) float32_t * ptr, float32x2x4_t val,
__constrange(0,1) int lane);     // VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_p8(__transfersize(4) poly8_t * ptr, poly8x8x4_t val,
__constrange(0,7) int lane);     // VST4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]

void  vst4_lane_p16(__transfersize(4) poly16_t * ptr, poly16x4x4_t val,
__constrange(0,3) int lane);     // VST4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]
```