概要
paizaで、インラインアセンブラやってみた。
練習問題やってみた。
練習問題
x87、叩け。
参考にしたページ
サンプルコード
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <math.h>
#define NUM_OF_ARRAY (32)
#define M_PI 3.1415926535897932384626433832795
double __attribute__ ((noinline)) fsin(double a) {
double r;
__asm__ volatile (
"movsd %%xmm0, %0\n"
"fldl %0\n"
"fsin \n"
"fstpl %1\n"
:"=m"(a)
:"m"(r)
);
return r;
}
static inline uint64_t RDTSCP() {
uint32_t hi,
lo,
aux;
__asm__ volatile("rdtscp" : "=a" (lo), "=d" (hi), "=c" (aux));
return ((uint64_t) hi << 32) | lo;
}
static double bufx[NUM_OF_ARRAY + 1];
static double bufs[NUM_OF_ARRAY + 1];
static double bufc[NUM_OF_ARRAY + 1];
int main(int argc, char * argv[]) {
uint64_t start,
end,
diffs,
diffc;
int count = NUM_OF_ARRAY,
i;
double xst = -1.0 * M_PI,
len = 2.0 * M_PI;
for (i = 0; i <= count; i++)
{
double w = len * (double) i / count;
bufx[i] = xst + w;
}
start = RDTSCP();
for (i = 0; i <= count; i++)
{
bufs[i] = fsin(bufx[i]);
}
end = RDTSCP();
diffs = end - start;
start = RDTSCP();
for (i = 0; i <= count; i++)
{
bufc[i] = sin(bufx[i]);
}
end = RDTSCP();
diffc = end - start;
printf("x\t\t\tfsin(x)\t\t\tsin(x)\n");
for (i = 0; i <= count; i++)
{
printf("%+.16e\t%+.16e\t%+.16e\n", bufx[i], bufs[i], bufc[i]);
}
printf("fsin tsc %" PRIu64 " sin tsc %" PRIu64 "\n", diffs/NUM_OF_ARRAY, diffc/NUM_OF_ARRAY);
return 0;
}
実行結果
x fsin(x) sin(x)
-3.1415926535897931e+00 -1.2246063538223773e-16 -1.2246467991473532e-16
-2.9452431127404308e+00 -1.9509032201612861e-01 -1.9509032201612861e-01
-2.7488935718910690e+00 -3.8268343236508989e-01 -3.8268343236508989e-01
-2.5525440310417071e+00 -5.5557023301960218e-01 -5.5557023301960218e-01
-2.3561944901923448e+00 -7.0710678118654757e-01 -7.0710678118654757e-01
-2.1598449493429825e+00 -8.3146961230254546e-01 -8.3146961230254546e-01
-1.9634954084936207e+00 -9.2387953251128674e-01 -9.2387953251128674e-01
-1.7671458676442586e+00 -9.8078528040323043e-01 -9.8078528040323043e-01
-1.5707963267948966e+00 -1.0000000000000000e+00 -1.0000000000000000e+00
-1.3744467859455345e+00 -9.8078528040323043e-01 -9.8078528040323043e-01
-1.1780972450961724e+00 -9.2387953251128674e-01 -9.2387953251128674e-01
-9.8174770424681057e-01 -8.3146961230254535e-01 -8.3146961230254535e-01
-7.8539816339744828e-01 -7.0710678118654746e-01 -7.0710678118654746e-01
-5.8904862254808599e-01 -5.5557023301960207e-01 -5.5557023301960207e-01
-3.9269908169872414e-01 -3.8268343236508978e-01 -3.8268343236508978e-01
-1.9634954084936229e-01 -1.9509032201612847e-01 -1.9509032201612847e-01
+0.0000000000000000e+00 +0.0000000000000000e+00 +0.0000000000000000e+00
+1.9634954084936229e-01 +1.9509032201612847e-01 +1.9509032201612847e-01
+3.9269908169872414e-01 +3.8268343236508978e-01 +3.8268343236508978e-01
+5.8904862254808599e-01 +5.5557023301960207e-01 +5.5557023301960207e-01
+7.8539816339744828e-01 +7.0710678118654746e-01 +7.0710678118654746e-01
+9.8174770424681057e-01 +8.3146961230254535e-01 +8.3146961230254535e-01
+1.1780972450961720e+00 +9.2387953251128652e-01 +9.2387953251128652e-01
+1.3744467859455343e+00 +9.8078528040323043e-01 +9.8078528040323043e-01
+1.5707963267948966e+00 +1.0000000000000000e+00 +1.0000000000000000e+00
+1.7671458676442588e+00 +9.8078528040323043e-01 +9.8078528040323043e-01
+1.9634954084936211e+00 +9.2387953251128663e-01 +9.2387953251128663e-01
+2.1598449493429825e+00 +8.3146961230254546e-01 +8.3146961230254546e-01
+2.3561944901923448e+00 +7.0710678118654757e-01 +7.0710678118654757e-01
+2.5525440310417071e+00 +5.5557023301960218e-01 +5.5557023301960218e-01
+2.7488935718910685e+00 +3.8268343236509028e-01 +3.8268343236509028e-01
+2.9452431127404308e+00 +1.9509032201612861e-01 +1.9509032201612861e-01
+3.1415926535897931e+00 +1.2246063538223773e-16 +1.2246467991473532e-16
fsin tsc 104 sin tsc 1393
成果物
以上。