vfpclasspd

avx512

Last updated at 2014-12-16Posted at 2014-12-15

fp の値を分類します。

QNAN
+0
-0
+inf
-inf
denormal
negative normal
SNAN

の分類ができます。組み合わせて使えます。例えば +0 or -0 という判定ができます。

# include <immintrin.h>
# include <stdio.h>
# include <math.h>

double in[8] = {NAN,
                1,
                INFINITY,
                1,

                1e-308,
                2.0,
                0,
                -0.0,
};

void
dump_bits8(__mmask8 a)
{
    for (int i=0; i<8; i++) {
        if (a & (1<<i)) {
            putchar('1');
        } else {
            putchar('0');
        }
    }
    puts("");
}

# define FPCLASS_PD(cond,v,imm)                                          \
    __asm__ __volatile__ ("vfpclasspd %[cat], %[src], %[dest]\n\t"      \
                          :[dest]"=Yk"(cond)                            \
                          :[cat]"i"(imm),                               \
                           [src]"v"(v)                                  \
                          :);                                           \

# define CLASS_QNAN (1<<0)
# define CLASS_POS_ZERO (1<<1)
# define CLASS_NEG_ZERO (1<<2)
# define CLASS_POS_INF (1<<3)
# define CLASS_NEG_INF (1<<4)
# define CLASS_DENORMAL (1<<5)
# define CLASS_NEGATIVE (1<<6)
# define CLASS_SNAN (1<<7)

int
main()
{
    __m512d v0 = _mm512_loadu_pd(in);
    __mmask8 cond_qnan;
    __mmask8 cond_pos_zero;
    __mmask8 cond_not_normal;
    for (int i=0; i<8; i++) {
        printf("%.1e, ", in[i]);
    }

    FPCLASS_PD(cond_qnan, v0, CLASS_QNAN);
    FPCLASS_PD(cond_pos_zero, v0, CLASS_POS_ZERO);
    FPCLASS_PD(cond_not_normal, v0, CLASS_QNAN | CLASS_POS_ZERO | CLASS_NEG_ZERO | CLASS_POS_INF | CLASS_NEG_INF | CLASS_DENORMAL | CLASS_SNAN);

    printf("%x\n", cond_qnan);

    printf("qnan     : ");
    dump_bits8(cond_qnan);

    printf("pos_zero : ");
    dump_bits8(cond_pos_zero);

    printf("!normal  : ");
    dump_bits8(cond_not_normal);
}

nan, 1.0e+00, inf, 1.0e+00, 1.0e-308, 2.0e+00, 0.0e+00, -0.0e+00, 1
qnan     : 10000000
pos_zero : 00000010
!normal  : 10101011

(+0はfpclassifyでのnormal判定されないと今知った)

vfpclasspd は、AVX512-DQという また別の 拡張になっていて(いったいいくつ拡張あるんだよ)、sde の挙動見ると、knl で動かない、skx で動くという状態のようです。

明日は @tanakmura が ' ' について書きます。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up