Source code
Revision control
Copy as Markdown
Other Tools
// Copyright 2020 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdio.h>
#include <cfloat> // FLT_MAX
#include <cmath> // std::abs
#include "hwy/base.h"
#include "hwy/nanobenchmark.h"
// clang-format off
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "hwy/contrib/math/math_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/contrib/math/math-inl.h"
#include "hwy/tests/test_util-inl.h"
// clang-format on
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
namespace {
// We have had test failures caused by excess precision due to keeping
// intermediate results in 80-bit x87 registers. One such failure mode is that
// Log1p computes a 1.0 which is not exactly equal to 1.0f, causing is_pole to
// incorrectly evaluate to false.
#undef HWY_MATH_TEST_EXCESS_PRECISION
#if HWY_ARCH_X86_32 && HWY_COMPILER_GCC_ACTUAL && \
(HWY_TARGET == HWY_SCALAR || HWY_TARGET == HWY_EMU128)
// GCC 13+: because CMAKE_CXX_EXTENSIONS is OFF, we build with -std= and hence
// also -fexcess-precision=standard, so there is no problem. See #1708 and
#if HWY_COMPILER_GCC_ACTUAL >= 1300
#define HWY_MATH_TEST_EXCESS_PRECISION 0
#else // HWY_COMPILER_GCC_ACTUAL < 1300
// The build system must enable SSE2, e.g. via HWY_CMAKE_SSE2 - see
#if defined(__SSE2__) // correct flag given, no problem
#define HWY_MATH_TEST_EXCESS_PRECISION 0
#else
#define HWY_MATH_TEST_EXCESS_PRECISION 1
#pragma message( \
"Skipping scalar math_test on 32-bit x86 GCC <13 without HWY_CMAKE_SSE2")
#endif // defined(__SSE2__)
#endif // HWY_COMPILER_GCC_ACTUAL
#else // not (x86-32, GCC, scalar target): running math_test normally
#define HWY_MATH_TEST_EXCESS_PRECISION 0
#endif // HWY_ARCH_X86_32 etc
template <class T, class D>
HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T),
Vec<D> (*fxN)(D, VecArg<Vec<D>>), D d, T min, T max,
uint64_t max_error_ulp) {
if (HWY_MATH_TEST_EXCESS_PRECISION) {
static bool once = true;
if (once) {
once = false;
HWY_WARN("Skipping math_test due to GCC issue with excess precision.\n");
}
return;
}
using UintT = MakeUnsigned<T>;
const UintT min_bits = BitCastScalar<UintT>(min);
const UintT max_bits = BitCastScalar<UintT>(max);
// If min is negative and max is positive, the range needs to be broken into
// two pieces, [+0, max] and [-0, min], otherwise [min, max].
int range_count = 1;
UintT ranges[2][2] = {{min_bits, max_bits}, {0, 0}};
if ((min < 0.0) && (max > 0.0)) {
ranges[0][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(+0.0));
ranges[0][1] = max_bits;
ranges[1][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(-0.0));
ranges[1][1] = min_bits;
range_count = 2;
}
uint64_t max_ulp = 0;
// Emulation is slower, so cannot afford as many.
constexpr UintT kSamplesPerRange = static_cast<UintT>(AdjustedReps(4000));
for (int range_index = 0; range_index < range_count; ++range_index) {
const UintT start = ranges[range_index][0];
const UintT stop = ranges[range_index][1];
const UintT step = HWY_MAX(1, ((stop - start) / kSamplesPerRange));
for (UintT value_bits = start; value_bits <= stop; value_bits += step) {
// For reasons unknown, the HWY_MAX is necessary on RVV, otherwise
// value_bits can be less than start, and thus possibly NaN.
const T value =
BitCastScalar<T>(HWY_MIN(HWY_MAX(start, value_bits), stop));
const T actual = GetLane(fxN(d, Set(d, value)));
const T expected = fx1(value);
// Skip small inputs and outputs on armv7, it flushes subnormals to zero.
#if HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7
if ((std::abs(value) < 1e-37f) || (std::abs(expected) < 1e-37f)) {
continue;
}
#endif
const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected);
max_ulp = HWY_MAX(max_ulp, ulp);
if (ulp > max_error_ulp) {
fprintf(stderr, "%s: %s(%f) expected %E actual %E ulp %g max ulp %u\n",
hwy::TypeName(T(), Lanes(d)).c_str(), name, value, expected,
actual, static_cast<double>(ulp),
static_cast<uint32_t>(max_error_ulp));
}
}
}
fprintf(stderr, "%s: %s max_ulp %g\n", hwy::TypeName(T(), Lanes(d)).c_str(),
name, static_cast<double>(max_ulp));
HWY_ASSERT(max_ulp <= max_error_ulp);
}
#define DEFINE_MATH_TEST_FUNC(NAME) \
HWY_NOINLINE void TestAll##NAME() { \
ForFloat3264Types(ForPartialVectors<Test##NAME>()); \
}
#undef DEFINE_MATH_TEST
#define DEFINE_MATH_TEST(NAME, F32x1, F32xN, F32_MIN, F32_MAX, F32_ERROR, \
F64x1, F64xN, F64_MIN, F64_MAX, F64_ERROR) \
struct Test##NAME { \
template <class T, class D> \
HWY_NOINLINE void operator()(T, D d) { \
if (sizeof(T) == 4) { \
TestMath<T, D>(HWY_STR(NAME), F32x1, F32xN, d, F32_MIN, F32_MAX, \
F32_ERROR); \
} else { \
TestMath<T, D>(HWY_STR(NAME), F64x1, F64xN, d, \
static_cast<T>(F64_MIN), static_cast<T>(F64_MAX), \
F64_ERROR); \
} \
} \
}; \
DEFINE_MATH_TEST_FUNC(NAME)
// Floating point values closest to but less than 1.0. Avoid variables with
// static initializers inside HWY_BEFORE_NAMESPACE/HWY_AFTER_NAMESPACE to
// ensure target-specific code does not leak into startup code.
float kNearOneF() { return BitCastScalar<float>(0x3F7FFFFF); }
double kNearOneD() { return BitCastScalar<double>(0x3FEFFFFFFFFFFFFFULL); }
// The discrepancy is unacceptably large for MSYS2 (less accurate libm?), so
// only increase the error tolerance there.
constexpr uint64_t Cos64ULP() {
#if defined(__MINGW32__)
return 23;
#else
return 3;
#endif
}
constexpr uint64_t ACosh32ULP() {
#if defined(__MINGW32__)
return 8;
#else
return 3;
#endif
}
template <class D>
static Vec<D> SinCosSin(const D d, VecArg<Vec<D>> x) {
Vec<D> s, c;
CallSinCos(d, x, s, c);
return s;
}
template <class D>
static Vec<D> SinCosCos(const D d, VecArg<Vec<D>> x) {
Vec<D> s, c;
CallSinCos(d, x, s, c);
return c;
}
// on targets without FMA the result is less inaccurate
constexpr uint64_t SinCosSin32ULP() {
#if !(HWY_NATIVE_FMA)
return 256;
#else
return 3;
#endif
}
constexpr uint64_t SinCosCos32ULP() {
#if !(HWY_NATIVE_FMA)
return 64;
#else
return 3;
#endif
}
// clang-format off
DEFINE_MATH_TEST(Acos,
std::acos, CallAcos, -1.0f, +1.0f, 3, // NEON is 3 instead of 2
std::acos, CallAcos, -1.0, +1.0, 2)
DEFINE_MATH_TEST(Acosh,
std::acosh, CallAcosh, +1.0f, +FLT_MAX, ACosh32ULP(),
std::acosh, CallAcosh, +1.0, +DBL_MAX, 3)
DEFINE_MATH_TEST(Asin,
std::asin, CallAsin, -1.0f, +1.0f, 4, // 4 ulp on Armv7, not 2
std::asin, CallAsin, -1.0, +1.0, 2)
DEFINE_MATH_TEST(Asinh,
std::asinh, CallAsinh, -FLT_MAX, +FLT_MAX, 3,
std::asinh, CallAsinh, -DBL_MAX, +DBL_MAX, 3)
DEFINE_MATH_TEST(Atan,
std::atan, CallAtan, -FLT_MAX, +FLT_MAX, 3,
std::atan, CallAtan, -DBL_MAX, +DBL_MAX, 3)
// NEON has ULP 4 instead of 3
DEFINE_MATH_TEST(Atanh,
std::atanh, CallAtanh, -kNearOneF(), +kNearOneF(), 4,
std::atanh, CallAtanh, -kNearOneD(), +kNearOneD(), 3)
DEFINE_MATH_TEST(Cos,
std::cos, CallCos, -39000.0f, +39000.0f, 3,
std::cos, CallCos, -39000.0, +39000.0, Cos64ULP())
DEFINE_MATH_TEST(Exp,
std::exp, CallExp, -FLT_MAX, +104.0f, 1,
std::exp, CallExp, -DBL_MAX, +104.0, 1)
DEFINE_MATH_TEST(Exp2,
std::exp2, CallExp2, -FLT_MAX, +128.0f, 2,
std::exp2, CallExp2, -DBL_MAX, +128.0, 2)
DEFINE_MATH_TEST(Expm1,
std::expm1, CallExpm1, -FLT_MAX, +104.0f, 4,
std::expm1, CallExpm1, -DBL_MAX, +104.0, 4)
DEFINE_MATH_TEST(Log,
std::log, CallLog, +FLT_MIN, +FLT_MAX, 1,
std::log, CallLog, +DBL_MIN, +DBL_MAX, 1)
DEFINE_MATH_TEST(Log10,
std::log10, CallLog10, +FLT_MIN, +FLT_MAX, 2,
std::log10, CallLog10, +DBL_MIN, +DBL_MAX, 2)
DEFINE_MATH_TEST(Log1p,
std::log1p, CallLog1p, +0.0f, +1e37f, 3, // NEON is 3 instead of 2
std::log1p, CallLog1p, +0.0, +DBL_MAX, 2)
DEFINE_MATH_TEST(Log2,
std::log2, CallLog2, +FLT_MIN, +FLT_MAX, 2,
std::log2, CallLog2, +DBL_MIN, +DBL_MAX, 2)
DEFINE_MATH_TEST(Sin,
std::sin, CallSin, -39000.0f, +39000.0f, 3,
std::sin, CallSin, -39000.0, +39000.0, 4) // MSYS is 4 instead of 3
DEFINE_MATH_TEST(Sinh,
std::sinh, CallSinh, -80.0f, +80.0f, 4,
std::sinh, CallSinh, -709.0, +709.0, 4)
DEFINE_MATH_TEST(Tanh,
std::tanh, CallTanh, -FLT_MAX, +FLT_MAX, 4,
std::tanh, CallTanh, -DBL_MAX, +DBL_MAX, 4)
DEFINE_MATH_TEST(SinCosSin,
std::sin, SinCosSin, -39000.0f, +39000.0f, SinCosSin32ULP(),
std::sin, SinCosSin, -39000.0, +39000.0, 1)
DEFINE_MATH_TEST(SinCosCos,
std::cos, SinCosCos, -39000.0f, +39000.0f, SinCosCos32ULP(),
std::cos, SinCosCos, -39000.0, +39000.0, 1)
// clang-format on
template <typename T, class D>
void Atan2TestCases(T /*unused*/, D d, size_t& padded,
AlignedFreeUniquePtr<T[]>& out_y,
AlignedFreeUniquePtr<T[]>& out_x,
AlignedFreeUniquePtr<T[]>& out_expected) {
struct YX {
T y;
T x;
T expected;
};
const T pos = ConvertScalarTo<T>(1E5);
const T neg = ConvertScalarTo<T>(-1E7);
const T p0 = ConvertScalarTo<T>(0);
// -0 is not enough to get an actual negative zero.
const T n0 = ConvertScalarTo<T>(-0.0);
const T p1 = ConvertScalarTo<T>(1);
const T n1 = ConvertScalarTo<T>(-1);
const T p2 = ConvertScalarTo<T>(2);
const T n2 = ConvertScalarTo<T>(-2);
const T inf = GetLane(Inf(d));
const T nan = GetLane(NaN(d));
const T pi = ConvertScalarTo<T>(3.141592653589793238);
const YX test_cases[] = { // 45 degree steps:
{p0, p1, p0}, // E
{n1, p1, -pi / 4}, // SE
{n1, p0, -pi / 2}, // S
{n1, n1, -3 * pi / 4}, // SW
{p0, n1, pi}, // W
{p1, n1, 3 * pi / 4}, // NW
{p1, p0, pi / 2}, // N
{p1, p1, pi / 4}, // NE
// y = ±0, x < 0 or -0
{p0, n1, pi},
{n0, n2, -pi},
// y = ±0, x > 0 or +0
{p0, p2, p0},
{n0, p2, n0},
// y = ±∞, x finite
{inf, p2, pi / 2},
{-inf, p2, -pi / 2},
// y = ±∞, x = -∞
{inf, -inf, 3 * pi / 4},
{-inf, -inf, -3 * pi / 4},
// y = ±∞, x = +∞
{inf, inf, pi / 4},
{-inf, inf, -pi / 4},
// y < 0, x = ±0
{n2, p0, -pi / 2},
{n1, n0, -pi / 2},
// y > 0, x = ±0
{pos, p0, pi / 2},
{p2, n0, pi / 2},
// finite y > 0, x = -∞
{pos, -inf, pi},
// finite y < 0, x = -∞
{neg, -inf, -pi},
// finite y > 0, x = +∞
{pos, inf, p0},
// finite y < 0, x = +∞
{neg, inf, n0},
// y NaN xor x NaN
{nan, p0, nan},
{pos, nan, nan}};
const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
const size_t N = Lanes(d);
padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors
out_y = AllocateAligned<T>(padded);
out_x = AllocateAligned<T>(padded);
out_expected = AllocateAligned<T>(padded);
HWY_ASSERT(out_y && out_x && out_expected);
size_t i = 0;
for (; i < kNumTestCases; ++i) {
out_y[i] = test_cases[i].y;
out_x[i] = test_cases[i].x;
out_expected[i] = test_cases[i].expected;
}
for (; i < padded; ++i) {
out_y[i] = p0;
out_x[i] = p0;
out_expected[i] = p0;
}
}
struct TestAtan2 {
template <typename T, class D>
HWY_NOINLINE void operator()(T t, D d) {
const size_t N = Lanes(d);
size_t padded;
AlignedFreeUniquePtr<T[]> in_y, in_x, expected;
Atan2TestCases(t, d, padded, in_y, in_x, expected);
const Vec<D> tolerance = Set(d, ConvertScalarTo<T>(1E-5));
for (size_t i = 0; i < padded; ++i) {
const T actual = ConvertScalarTo<T>(atan2(in_y[i], in_x[i]));
// fprintf(stderr, "%zu: table %f atan2 %f\n", i, expected[i], actual);
HWY_ASSERT_EQ(expected[i], actual);
}
for (size_t i = 0; i < padded; i += N) {
const Vec<D> y = Load(d, &in_y[i]);
const Vec<D> x = Load(d, &in_x[i]);
#if HWY_ARCH_ARM_A64
// TODO(b/287462770): inline to work around incorrect SVE codegen
const Vec<D> actual = Atan2(d, y, x);
#else
const Vec<D> actual = CallAtan2(d, y, x);
#endif
const Vec<D> vexpected = Load(d, &expected[i]);
const Mask<D> exp_nan = IsNaN(vexpected);
const Mask<D> act_nan = IsNaN(actual);
HWY_ASSERT_MASK_EQ(d, exp_nan, act_nan);
// If not NaN, then compare with tolerance
const Mask<D> ge = Ge(actual, Sub(vexpected, tolerance));
const Mask<D> le = Le(actual, Add(vexpected, tolerance));
const Mask<D> ok = Or(act_nan, And(le, ge));
if (!AllTrue(d, ok)) {
const size_t mismatch =
static_cast<size_t>(FindKnownFirstTrue(d, Not(ok)));
fprintf(stderr, "Mismatch for i=%d expected %E actual %E\n",
static_cast<int>(i + mismatch), expected[i + mismatch],
ExtractLane(actual, mismatch));
HWY_ASSERT(0);
}
}
}
};
HWY_NOINLINE void TestAllAtan2() {
if (HWY_MATH_TEST_EXCESS_PRECISION) return;
ForFloat3264Types(ForPartialVectors<TestAtan2>());
}
template <typename T, class D>
void HypotTestCases(T /*unused*/, D d, size_t& padded,
AlignedFreeUniquePtr<T[]>& out_a,
AlignedFreeUniquePtr<T[]>& out_b,
AlignedFreeUniquePtr<T[]>& out_expected) {
using TU = MakeUnsigned<T>;
struct AB {
T a;
T b;
};
constexpr int kNumOfMantBits = MantissaBits<T>();
static_assert(kNumOfMantBits > 0, "kNumOfMantBits > 0 must be true");
// Ensures inputs are not constexpr.
const TU u1 = static_cast<TU>(hwy::Unpredictable1());
const double k1 = static_cast<double>(u1);
const T pos = ConvertScalarTo<T>(1E5 * k1);
const T neg = ConvertScalarTo<T>(-1E7 * k1);
const T p0 = ConvertScalarTo<T>(k1 - 1.0);
// -0 is not enough to get an actual negative zero.
const T n0 = ScalarCopySign<T>(p0, neg);
const T p1 = ConvertScalarTo<T>(k1);
const T n1 = ConvertScalarTo<T>(-k1);
const T p2 = ConvertScalarTo<T>(2 * k1);
const T n2 = ConvertScalarTo<T>(-2 * k1);
const T inf = BitCastScalar<T>(ExponentMask<T>() * u1);
const T neg_inf = ScalarCopySign(inf, n1);
const T nan = BitCastScalar<T>(
static_cast<TU>(ExponentMask<T>() | (u1 << (kNumOfMantBits - 1))));
const double max_as_f64 = ConvertScalarTo<double>(HighestValue<T>()) * k1;
const T max = ConvertScalarTo<T>(max_as_f64);
const T huge = ConvertScalarTo<T>(max_as_f64 * 0.25);
const T neg_huge = ScalarCopySign(huge, n1);
const T huge2 = ConvertScalarTo<T>(max_as_f64 * 0.039415044328304796);
const T large = ConvertScalarTo<T>(3.512227595593985E18 * k1);
const T neg_large = ScalarCopySign(large, n1);
const T large2 = ConvertScalarTo<T>(2.1190576943127544E16 * k1);
const T small = ConvertScalarTo<T>(1.067033284841808E-11 * k1);
const T neg_small = ScalarCopySign(small, n1);
const T small2 = ConvertScalarTo<T>(1.9401409532292856E-12 * k1);
const T tiny = BitCastScalar<T>(static_cast<TU>(u1 << kNumOfMantBits));
const T neg_tiny = ScalarCopySign(tiny, n1);
const T tiny2 =
ConvertScalarTo<T>(78.68466968859765 * ConvertScalarTo<double>(tiny));
const AB test_cases[] = {{p0, p0}, {p0, n0},
{n0, n0}, {p1, p1},
{p1, n1}, {n1, n1},
{p2, p2}, {p2, n2},
{p2, pos}, {p2, neg},
{n2, pos}, {n2, neg},
{n2, n2}, {p0, tiny},
{p0, neg_tiny}, {n0, tiny},
{n0, neg_tiny}, {p1, tiny},
{p1, neg_tiny}, {n1, tiny},
{n1, neg_tiny}, {tiny, p0},
{tiny2, p0}, {tiny, tiny2},
{neg_tiny, tiny2}, {huge, huge2},
{neg_huge, huge2}, {huge, p0},
{huge, tiny}, {huge2, tiny2},
{large, p0}, {large, large2},
{neg_large, p0}, {neg_large, large2},
{small, p0}, {small, small2},
{neg_small, p0}, {neg_small, small2},
{max, p0}, {max, huge},
{max, max}, {p0, inf},
{n0, inf}, {p1, inf},
{n1, inf}, {p2, inf},
{n2, inf}, {p0, neg_inf},
{n0, neg_inf}, {p1, neg_inf},
{n1, neg_inf}, {p2, neg_inf},
{n2, neg_inf}, {p0, nan},
{n0, nan}, {p1, nan},
{n1, nan}, {p2, nan},
{n2, nan}, {huge, inf},
{inf, nan}, {neg_inf, nan},
{nan, nan}};
const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
const size_t N = Lanes(d);
padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors
out_a = AllocateAligned<T>(padded);
out_b = AllocateAligned<T>(padded);
out_expected = AllocateAligned<T>(padded);
HWY_ASSERT(out_a && out_b && out_expected);
size_t i = 0;
for (; i < kNumTestCases; ++i) {
const T a =
test_cases[i].a * hwy::ConvertScalarTo<T>(hwy::Unpredictable1());
const T b = test_cases[i].b;
#if HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7
// Ignore test cases that have infinite or NaN inputs on Armv7 NEON
if (!ScalarIsFinite(a) || !ScalarIsFinite(b)) {
out_a[i] = p0;
out_b[i] = p0;
out_expected[i] = p0;
continue;
}
#endif
out_a[i] = a;
out_b[i] = b;
if (ScalarIsInf(a) || ScalarIsInf(b)) {
out_expected[i] = inf;
} else if (ScalarIsNaN(a) || ScalarIsNaN(b)) {
out_expected[i] = nan;
} else {
out_expected[i] = std::hypot(a, b);
}
}
for (; i < padded; ++i) {
out_a[i] = p0;
out_b[i] = p0;
out_expected[i] = p0;
}
}
struct TestHypot {
template <typename T, class D>
HWY_NOINLINE void operator()(T t, D d) {
if (HWY_MATH_TEST_EXCESS_PRECISION) {
return;
}
const size_t N = Lanes(d);
constexpr uint64_t kMaxErrorUlp = 4;
size_t padded;
AlignedFreeUniquePtr<T[]> in_a, in_b, expected;
HypotTestCases(t, d, padded, in_a, in_b, expected);
auto actual1_lanes = AllocateAligned<T>(N);
auto actual2_lanes = AllocateAligned<T>(N);
HWY_ASSERT(actual1_lanes && actual2_lanes);
uint64_t max_ulp = 0;
for (size_t i = 0; i < padded; i += N) {
const auto a = Load(d, in_a.get() + i);
const auto b = Load(d, in_b.get() + i);
#if HWY_ARCH_ARM_A64
// TODO(b/287462770): inline to work around incorrect SVE codegen
const auto actual1 = Hypot(d, a, b);
const auto actual2 = Hypot(d, b, a);
#else
const auto actual1 = CallHypot(d, a, b);
const auto actual2 = CallHypot(d, b, a);
#endif
Store(actual1, d, actual1_lanes.get());
Store(actual2, d, actual2_lanes.get());
for (size_t j = 0; j < N; j++) {
const T val_a = in_a[i + j];
const T val_b = in_b[i + j];
const T expected_val = expected[i + j];
const T actual1_val = actual1_lanes[j];
const T actual2_val = actual2_lanes[j];
const auto ulp1 =
hwy::detail::ComputeUlpDelta(actual1_val, expected_val);
if (ulp1 > kMaxErrorUlp) {
fprintf(stderr,
"%s: Hypot(%e, %e) lane %d expected %E actual %E ulp %g max "
"ulp %u\n",
hwy::TypeName(T(), Lanes(d)).c_str(), val_a, val_b,
static_cast<int>(j), expected_val, actual1_val,
static_cast<double>(ulp1),
static_cast<uint32_t>(kMaxErrorUlp));
}
const auto ulp2 =
hwy::detail::ComputeUlpDelta(actual2_val, expected_val);
if (ulp2 > kMaxErrorUlp) {
fprintf(stderr,
"%s: Hypot(%e, %e) expected %E actual %E ulp %g max ulp %u\n",
hwy::TypeName(T(), Lanes(d)).c_str(), val_b, val_a,
expected_val, actual2_val, static_cast<double>(ulp2),
static_cast<uint32_t>(kMaxErrorUlp));
}
max_ulp = HWY_MAX(max_ulp, HWY_MAX(ulp1, ulp2));
}
}
if (max_ulp != 0) {
fprintf(stderr, "%s: Hypot max_ulp %g\n",
hwy::TypeName(T(), Lanes(d)).c_str(),
static_cast<double>(max_ulp));
HWY_ASSERT(max_ulp <= kMaxErrorUlp);
}
}
};
HWY_NOINLINE void TestAllHypot() {
if (HWY_MATH_TEST_EXCESS_PRECISION) return;
ForFloat3264Types(ForPartialVectors<TestHypot>());
}
} // namespace
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
namespace {
HWY_BEFORE_TEST(HwyMathTest);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcos);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAcosh);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAsin);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAsinh);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtan);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtanh);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllCos);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExp);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExp2);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExpm1);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog10);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog1p);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog2);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSin);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinh);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllTanh);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllAtan2);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinCosSin);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllSinCosCos);
HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllHypot);
HWY_AFTER_TEST();
} // namespace
} // namespace hwy
HWY_TEST_MAIN();
#endif // HWY_ONCE