ch15_03/main.cpp |
#include <iostream> #include <iomanip> #include <cmath> #include "Vec128.h" using namespace std; extern void F32fromI32(Vec128 x[2], const Vec128& a); extern void I32fromF32(Vec128 x[2], const Vec128& a); extern void F64fromI64(Vec128 x[2], const Vec128& a); extern void I64fromF64(Vec128 x[2], const Vec128& a); extern void F32fromU32(Vec128 x[2], const Vec128& a); extern void U32fromF32(Vec128 x[2], const Vec128& a); extern void F64fromU64(Vec128 x[2], const Vec128& a); extern void U64fromF64(Vec128 x[2], const Vec128& a); extern void F32fromF64(Vec128 x[2], const Vec128& a, const Vec128& b); extern void F64fromF32(Vec128 x[2], const Vec128& a); void PackedConvertA(void) { const char nl = '\n'; Vec128 x[2], a; // F32_I32 a.m_I32[0] = 10; a.m_I32[1] = -500; a.m_I32[2] = 600; a.m_I32[3] = -1024; F32fromI32(x, a); cout << "\nResults for CvtOp::F32_I32\n"; cout << "a: " << a.ToStringI32() << nl; cout << "x[0]: " << x[0].ToStringF32() << nl; // I32_F32 a.m_F32[0] = -1.25f; a.m_F32[1] = 100.875f; a.m_F32[2] = -200.0f; a.m_F32[3] = (float)M_PI; I32fromF32(x, a); cout << "\nResults for CvtOp::I32_F32\n"; cout << "a: " << a.ToStringF32() << nl; cout << "x[0]: " << x[0].ToStringI32() << nl; // F64_I64 a.m_I64[0] = 1000; a.m_I64[1] = -500000000000; F64fromI64(x, a); cout << "\nResults for CvtOp::F64_I64\n"; cout << "a: " << a.ToStringI64() << nl; cout << "x[0]: " << x[0].ToStringF64() << nl; // I64_F64 a.m_F64[0] = -122.66666667; a.m_F64[1] = 1234567890123.75; I64fromF64(x, a); cout << "\nResults for CvtOp::I64_F64\n"; cout << "a: " << a.ToStringF64() << nl; cout << "x[0]: " << x[0].ToStringI64() << nl; } void PackedConvertB(void) { const char nl = '\n'; Vec128 x[2], a; // F32_U32 a.m_U32[0] = 10; a.m_U32[1] = 500; a.m_U32[2] = 600; a.m_U32[3] = 1024; F32fromU32(x, a); cout << "\nResults for CvtOp::F32_U32\n"; cout << "a: " << a.ToStringU32() << nl; cout << "x[0]: " << x[0].ToStringF32() << nl; // U32_F32 a.m_F32[0] = 1.25f; a.m_F32[1] = 100.875f; a.m_F32[2] = 200.0f; a.m_F32[3] = (float)M_PI; U32fromF32(x, a); cout << "\nResults for CvtOp::U32_F32\n"; cout << "a: " << a.ToStringF32() << nl; cout << "x[0]: " << x[0].ToStringU32() << nl; // F64_U64 a.m_I64[0] = 1000; a.m_I64[1] = 420000000000; F64fromU64(x, a); cout << "\nResults for CvtOp::F64_U64\n"; cout << "a: " << a.ToStringU64() << nl; cout << "x[0]: " << x[0].ToStringF64() << nl; // U64_F64 a.m_F64[0] = 698.40; a.m_F64[1] = 1234567890123.75; U64fromF64(x, a); cout << "\nResults for CvtOp::U64_F64\n"; cout << "a: " << a.ToStringF64() << nl; cout << "x[0]: " << x[0].ToStringU64() << nl; } void PackedConvertC(void) { const char nl = '\n'; Vec128 x[2], a, b; // F32_F64 a.m_F64[0] = M_PI; a.m_F64[1] = M_LOG10E; b.m_F64[0] = -M_E; b.m_F64[1] = M_LN2; F32fromF64(x, a, b); cout << "\nResults for CvtOp::F32_F64\n"; cout << "a: " << a.ToStringF64() << nl; cout << "b: " << b.ToStringF64() << nl; cout << "x[0]: " << x[0].ToStringF32() << nl; // F64_F32 a.m_F32[0] = 1.0f / 9.0f; a.m_F32[1] = 100.875f; a.m_F32[2] = 200.0f; a.m_F32[3] = (float)M_SQRT2; F64fromF32(x, a); cout << "\nResults for CvtOp::F64_F32\n"; cout << "a: " << a.ToStringF32() << nl; cout << "x[0]: " << x[0].ToStringF64() << nl; cout << "x[1]: " << x[1].ToStringF64() << nl; } int main() { PackedConvertA(); PackedConvertB(); PackedConvertC(); return 0; } |
ch15_03/neon.cpp |
#include "Vec128.h" void F32fromI32(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.4s}, [x1] \n\ scvtf v1.4s, v0.4s // float32 <- int32 \n\ st1 {v1.4s}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void I32fromF32(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.4s}, [x1] \n\ fcvtns v1.4s, v0.4s // int32 <- float32 \n\ st1 {v1.4s}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void F64fromI64(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.2d}, [x1] \n\ scvtf v1.2d, v0.2d // float64 <- int64 \n\ st1 {v1.2d}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void I64fromF64(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.2d}, [x1] \n\ fcvtns v1.2d, v0.2d // int32 <- float32 \n\ st1 {v1.2d}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void F32fromU32(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.4s}, [x1] \n\ ucvtf v1.4s, v0.4s // float32 <- int32 \n\ st1 {v1.4s}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void U32fromF32(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.4s}, [x1] \n\ fcvtnu v1.4s, v0.4s // uint32 <- float32 \n\ st1 {v1.4s}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void F64fromU64(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.2d}, [x1] \n\ ucvtf v1.2d, v0.2d // float64 <- int64 \n\ st1 {v1.2d}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void U64fromF64(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.2d}, [x1] \n\ fcvtnu v1.2d, v0.2d // uint64 <- float64 \n\ st1 {v1.2d}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "x0", "x1" ); } void F32fromF64(Vec128 x[2], const Vec128& a, const Vec128& b) { __asm volatile ("\n\ ld1 {v0.2d}, [x1] \n\ ld1 {v2.2d}, [x2] \n\ fcvtn v1.2s, v0.2d // lower-order F32 \n\ fcvtn2 v1.4s, v2.2d // higher-order F32 \n\ st1 {v1.4s}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "v2", "x0", "x1" ); } void F64fromF32(Vec128 x[2], const Vec128& a) { __asm volatile ("\n\ ld1 {v0.4s}, [x1] \n\ fcvtl v1.2d, v0.2s // lower-order F32 \n\ fcvtl2 v2.2d, v0.4s // higher-order F32 \n\ st1 {v1.2d, v2.2d}, [x0] // [x0] = v1 \n\ " : : : "v0", "v1", "v2", "x0", "x1" ); } |
ch15_03/main.cpp の実行例 |
arm64@manet ch15_03 % g++ -I.. -std=c++11 -O main.cpp neon.cpp -o a.out arm64@manet ch15_03 % ./a.out Results for CvtOp::F32_I32 a: 10 -500 | 600 -1024 x[0]: 10.000000 -500.000000 | 600.000000 -1024.000000 Results for CvtOp::I32_F32 a: -1.250000 100.875000 | -200.000000 3.141593 x[0]: -1 101 | -200 3 Results for CvtOp::F64_I64 a: 1000 | -500000000000 x[0]: 1000.000000000000 | -500000000000.000000000000 Results for CvtOp::I64_F64 a: -122.666666670000 | 1234567890123.750000000000 x[0]: -123 | 1234567890124 Results for CvtOp::F32_U32 a: 10 500 | 600 1024 x[0]: 10.000000 500.000000 | 600.000000 1024.000000 Results for CvtOp::U32_F32 a: 1.250000 100.875000 | 200.000000 3.141593 x[0]: 1 101 | 200 3 Results for CvtOp::F64_U64 a: 1000 | 420000000000 x[0]: 1000.000000000000 | 420000000000.000000000000 Results for CvtOp::U64_F64 a: 698.400000000000 | 1234567890123.750000000000 x[0]: 698 | 1234567890124 Results for CvtOp::F32_F64 a: 3.141592653590 | 0.434294481903 b: -2.718281828459 | 0.693147180560 x[0]: 3.141593 0.434294 | -2.718282 0.693147 Results for CvtOp::F64_F32 a: 0.111111 100.875000 | 200.000000 1.414214 x[0]: 0.111111111939 | 100.875000000000 x[1]: 200.000000000000 | 1.414213538170 |