#include "Vec128.h" void PackedCompareF32_(Vec128 x[8], const Vec128& a, const Vec128& b) { __asm volatile("\n\ ld1 {v0.4s}, [x1] // v0 = a \n\ ld1 {v1.4s}, [x2] // v1 = b \n\ fcmeq v2.4s, v0.4s, v1.4s // packed a == b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ not v2.16b, v2.16b // packed a !=b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.4s, v0.4s, v1.4s // packed a > b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmge v2.4s, v0.4s, v1.4s // packed a >= b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.4s, v0.4s, v1.4s // packed a < b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmle v2.4s, v0.4s, v1.4s // packed a <= b \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.4s, v0.4s, 0.0 // packed a < 0 \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.4s, v1.4s, 0.0 // packed b > 0 \n\ st1 {v2.4s}, [x0], 16 // [x0]=v2; x0+=16 \n\ " : : : "v0", "v1", "v2" ); } void PackedCompareF64_(Vec128 x[8], const Vec128& a, const Vec128& b) { __asm volatile("\n\ ld1 {v0.2d}, [x1] // v0 = a \n\ ld1 {v1.2d}, [x2] // v1 = b \n\ fcmeq v2.2d, v0.2d, v1.2d // packed a == b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ not v2.16b, v2.16b // packed a !=b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.2d, v0.2d, v1.2d // packed a > b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmge v2.2d, v0.2d, v1.2d // packed a >= b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.2d, v0.2d, v1.2d // packed a < b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmle v2.2d, v0.2d, v1.2d // packed a <= b \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmlt v2.2d, v0.2d, 0.0 // packed a < 0 \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ \n\ fcmgt v2.2d, v1.2d, 0.0 // packed b > 0 \n\ st1 {v2.2d}, [x0], 16 // [x0]=v2; x0+=16 \n\ " : : : "v0", "v1", "v2" ); }