ch15_02/main.cpp |
#include <iostream> #include <iomanip> #include <cmath> #include "Vec128.h" using namespace std; extern void PackedCompareF32_(Vec128 x[8], const Vec128& a, const Vec128& b); extern void PackedCompareF64_(Vec128 x[8], const Vec128& a, const Vec128& b); const char* c_CmpStr[8] = { "EQ", "NE", "GT", "GE", "LT", "LE", "a LT0", "b GT0" }; void PackedCompareF32(void) { const char nl = '\n'; Vec128 x[8], a, b; a.m_F32[0] = 2.0; b.m_F32[0] = -4.0; a.m_F32[1] = 17.0; b.m_F32[1] = 12.0; a.m_F32[2] = -6.0; b.m_F32[2] = -6.0; a.m_F32[3] = 3.0; b.m_F32[3] = 8.0; PackedCompareF32_(x, a, b); cout << "\nResults for PackedCompareF32_\n"; cout << setw(11) << 'a' << ':' << a.ToStringF32() << nl; cout << setw(11) << 'b' << ':' << b.ToStringF32() << nl; cout << nl; for (int j = 0; j < 8; j++) cout << setw(11) << c_CmpStr[j] << ':' << x[j].ToStringX32() << nl; } void PackedCompareF64(void) { const char nl = '\n'; Vec128 x[8], a, b; a.m_F64[0] = -2.0; b.m_F64[0] = -4.0; a.m_F64[1] = M_SQRT2; b.m_F64[1] = M_PI; PackedCompareF64_(x, a, b); cout << "\nResults for PackedCompareF64_\n"; cout << setw(11) << 'a' << ':' << a.ToStringF64() << nl; cout << setw(11) << 'b' << ':' << b.ToStringF64() << nl; cout << nl; for (int j = 0; j < 8; j++) cout << setw(11) << c_CmpStr[j] << ':' << x[j].ToStringX64() << nl; } int main() { PackedCompareF32(); PackedCompareF64(); return 0; } |
ch15_02/neon.cpp |
#include "Vec128.h" void PackedCompareF32_(Vec128 x[8], const Vec128& a, const Vec128& b) { __asm volatile("\n\ ld1 {v0.4s}, [x1] // v0 = a \n\ ld1 {v1.4s}, [x2] // v1 = b \n\ fcmeq v2.4s, v0.4s, v1.4s // packed a == b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ not v2.16b, v2.16b // packed a !=b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.4s, v0.4s, v1.4s // packed a > b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmge v2.4s, v0.4s, v1.4s // packed a >= b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.4s, v0.4s, v1.4s // packed a < b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmle v2.4s, v0.4s, v1.4s // packed a <= b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.4s, v0.4s, 0.0 // packed a < 0 \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.4s, v1.4s, 0.0 // packed b > 0 \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ " : : : "v0", "v1", "v2" ); } void PackedCompareF64_(Vec128 x[8], const Vec128& a, const Vec128& b) { __asm volatile("\n\ ld1 {v0.2d}, [x1] // v0 = a \n\ ld1 {v1.2d}, [x2] // v1 = b \n\ fcmeq v2.2d, v0.2d, v1.2d // packed a == b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ not v2.16b, v2.16b // packed a !=b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.2d, v0.2d, v1.2d // packed a > b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmge v2.2d, v0.2d, v1.2d // packed a >= b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.2d, v0.2d, v1.2d // packed a < b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmle v2.2d, v0.2d, v1.2d // packed a <= b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.2d, v0.2d, 0.0 // packed a < 0 \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.2d, v1.2d, 0.0 // packed b > 0 \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ " : : : "v0", "v1", "v2" ); } |
ch15_02/main.cpp の実行例 |
arm64@manet Ch15_02 % g++ -I.. -std=c++11 -O main.cpp neon.cpp -o a.out arm64@manet Ch15_02 % ./a.out Results for PackedCompareF32_ a: 2.000000 17.000000 | -6.000000 3.000000 b: -4.000000 12.000000 | -6.000000 8.000000 EQ: 00000000 00000000 | FFFFFFFF 00000000 NE: FFFFFFFF FFFFFFFF | 00000000 FFFFFFFF GT: FFFFFFFF FFFFFFFF | 00000000 00000000 GE: FFFFFFFF FFFFFFFF | FFFFFFFF 00000000 LT: 00000000 00000000 | 00000000 FFFFFFFF LE: 00000000 00000000 | FFFFFFFF FFFFFFFF a LT0: 00000000 00000000 | FFFFFFFF 00000000 b GT0: 00000000 FFFFFFFF | 00000000 FFFFFFFF Results for PackedCompareF64_ a: -2.000000000000 | 1.414213562373 b: -4.000000000000 | 3.141592653590 EQ: 0000000000000000 | 0000000000000000 NE: FFFFFFFFFFFFFFFF | FFFFFFFFFFFFFFFF GT: FFFFFFFFFFFFFFFF | 0000000000000000 GE: FFFFFFFFFFFFFFFF | 0000000000000000 LT: 0000000000000000 | FFFFFFFFFFFFFFFF LE: 0000000000000000 | FFFFFFFFFFFFFFFF a LT0: FFFFFFFFFFFFFFFF | 0000000000000000 b GT0: 0000000000000000 | FFFFFFFFFFFFFFFF |