avx512
AVX-512Day 25

vpscatterdd

More than 3 years have passed since last update.

scatter も実装されました。

レジスタに入っている16個のインデクスと、配列先頭アドレスを加算して、出てきた16個のアドレスに、入力値をストアします。

#include <immintrin.h>
#include <stdio.h>

#define N 4096

unsigned int out[N];
unsigned int index0[16];
int in0[16] = {100,101,102,103,
               104,105,106,107,
               108,109,110,111,
               112,113,114,115};

int
main(void)
{
    for (int i=0; i<16; i++) {
        index0[i] = rand()%N;
    }

    __m512i in = _mm512_loadu_si512(in0);
    __m512i index = _mm512_loadu_si512(index0);

    _mm512_i32scatter_epi32(out, index, in, 4);

    for (int i=0; i<16; i++) {
        printf("index[%2d] = %4d, val[%2d] = %4d\n", i, index0[i], i, in0[i]);
    }

    for (int i=0; i<N; i++) {
        if (out[i] != 0) {
            printf("out[%d] = %d\n", i, out[i]);
        }
    }
}

index[ 0] = 1383, val[ 0] =  100
index[ 1] =  966, val[ 1] =  101
index[ 2] = 2153, val[ 2] =  102
index[ 3] = 2163, val[ 3] =  103
index[ 4] = 3153, val[ 4] =  104
index[ 5] = 3327, val[ 5] =  105
index[ 6] = 1098, val[ 6] =  106
index[ 7] = 2284, val[ 7] =  107
index[ 8] = 3881, val[ 8] =  108
index[ 9] = 3277, val[ 9] =  109
index[10] = 2234, val[10] =  110
index[11] = 1963, val[11] =  111
index[12] =  498, val[12] =  112
index[13] = 3835, val[13] =  113
index[14] = 2531, val[14] =  114
index[15] =  326, val[15] =  115
out[326] = 115
out[498] = 112
out[966] = 101
out[1098] = 106
out[1383] = 100
out[1963] = 111
out[2153] = 102
out[2163] = 103
out[2234] = 110
out[2284] = 107
out[2531] = 114
out[3153] = 104
out[3277] = 109
out[3327] = 105
out[3835] = 113
out[3881] = 108

以上です。