1
0
Fork 0

Refactor and simplify the CpuInfo class to a stripped down version that only works out the flags it needs to know

pull/3602/head
JackWright347 2021-07-29 21:48:45 +02:00
parent 93a93b3d17
commit 947bbcc09b
4 changed files with 108 additions and 194 deletions

View File

@ -79,7 +79,7 @@ endif
# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
# vnni256 = yes/no --- -mavx256vnni --- Use Intel Vector Neural Network Instructions 256
# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
#
@ -326,7 +326,7 @@ endif
ifeq ($(COMP),gcc)
comp=gcc
CXX=g++
CXXFLAGS += -pedantic -Wextra -Wshadow -mxsave
CXXFLAGS += -pedantic -Wextra -Wshadow
ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android)
@ -368,7 +368,7 @@ ifeq ($(COMP),mingw)
CXX=g++
endif
CXXFLAGS += -Wextra -Wshadow -mxsave
CXXFLAGS += -Wextra -Wshadow
LDFLAGS += -static
endif

View File

@ -33,10 +33,6 @@ const CpuInfo::CpuId Stockfish::CpuInfo::CPUID;
__cpuidex(out, eax, ecx);
}
uint64_t CpuInfo::xgetbv(unsigned int x) {
return _xgetbv(x);
}
# elif defined(__GNUC__) || defined(__clang__)
#include <cpuid.h>
@ -45,12 +41,6 @@ const CpuInfo::CpuId Stockfish::CpuInfo::CPUID;
__cpuid_count(eax, ecx, out[0], out[1], out[2], out[3]);
}
uint64_t CpuInfo::xgetbv(unsigned int index) {
uint32_t eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((uint64_t)edx << 32) | eax;
}
#else
# message "No CPU-ID intrinsic defined for compiler."
#endif
@ -58,82 +48,67 @@ const CpuInfo::CpuId Stockfish::CpuInfo::CPUID;
# message "No CPU-ID intrinsic defined for processor architecture (currently only x86-32/64 is supported)."
#endif
#ifndef _XCR_XFEATURE_ENABLED_MASK
#define _XCR_XFEATURE_ENABLED_MASK 0
#endif
bool CpuInfo::OS_AVX() {
bool CpuInfo::osAVX() {
if (OSXSAVE() && AVX())
{
const uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
// check for OS-support of YMM state. Necessary for AVX and AVX2.
return (xcrFeatureMask & 0x06) == 0x06;
// Check OS has enabled both XMM and YMM state support. Necessary for AVX and AVX2.
return (xcrFeatureMask() & 0x06) == 0x06;
}
return false;
}
bool CpuInfo::OS_AVX2() {
if (OS_AVX())
bool CpuInfo::osAVX2() {
if (osAVX())
{
return AVX2();
}
return false;
}
bool CpuInfo::OS_AVX512() {
if (OS_AVX() && AVX512F() && AVX512DQ() && AVX512CD() && AVX512BW() && AVX512VL())
bool CpuInfo::osAVX512() {
if (osAVX() && AVX512F() && AVX512BW())
{
const uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
// Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
return (xcrFeatureMask & 0xE6) == 0xE6;
return (xcrFeatureMask() & 0xE6) == 0xE6;
}
return false;
}
std::string CpuInfo::get_info_string() {
std::string CpuInfo::infoString() {
std::string s;
s += "\nVendor: ";
s += Vendor();
s += vendor();
s += ", Family: ";
s += std::to_string(CPUID._family);
s += ", Model: ";
s += std::to_string(CPUID._model);
s += ", Stepping: ";
s += std::to_string(CPUID._stepping);
s += "\n";
s += "Brand: ";
s += Brand();
s += brand();
s += "\n";
s += "Hardware Features: ";
if (X64()) s += "64bit ";
if (MMX()) s += "MMX ";
if (ABM()) s += "ABM ";
if (RDRAND()) s += "RDRAND ";
if (RDSEED()) s += "RDSEED ";
if (BMI1()) s += "BMI1 ";
if (BMI2()) s += "BMI2 ";
if (ADX()) s += "ADX ";
if (MPX()) s += "MPX ";
if (PREFETCHWT1()) s += "PREFETCHWT1 ";
if (RDPID()) s += "RDPID ";
if (GFNI()) s += "GFNI ";
if (VAES()) s += "VAES ";
if (SSE()) s += "SSE ";
if (SSE2()) s += "SSE2 ";
if (SSE3()) s += "SSE3 ";
if (SSSE3()) s += "SSSE3 ";
if (SSE4a()) s += "SSE4a ";
if (SSE41()) s += "SSE4.1 ";
if (SSE42()) s += "SSE4.2 ";
if (AES()) s += "AES-NI ";
if (SHA()) s += "SHA ";
if (POPCNT()) s += "POPCNT ";
if (AVX()) s += "AVX ";
if (AVX2()) s += "AVX2 ";
if (BMI2()) s += "BMI2 ";
s += "\n";
s += "OS + Hardware Features: ";
(OS_AVX()) ? s += "AVX = yes, " : s += "AVX = no, ";
(OS_AVX2()) ? s += "AVX2 = yes, " : s += "AVX2 = no, ";
(OS_AVX512()) ? s += "AVX-512 = yes" : s += "AVX-512 = no";
s += "OS Supported Features: ";
(osAVX()) ? s += "AVX = yes, " : s += "AVX = no, ";
(osAVX2()) ? s += "AVX2 = yes, " : s += "AVX2 = no, ";
(osAVX512()) ? s += "AVX-512 = yes" : s += "AVX-512 = no";
s += "\n";
return s;

View File

@ -33,89 +33,39 @@ namespace Stockfish {
class CpuId;
public:
static std::string Vendor() { return CPUID._vendor; }
static std::string Brand() { return CPUID._brand; }
static std::string get_info_string();
static std::string vendor() { return CPUID._vendor; }
static std::string brand() { return CPUID._brand; }
static std::string infoString();
static bool isIntel() { return CPUID._isIntel; }
static bool isAMD() { return CPUID._isAMD; }
static bool isAMDZen3() { return CPUID._isAMD && CPUID._family > 24; }
static bool OS_AVX();
static bool OS_AVX2();
static bool OS_AVX512();
static bool isIntel() { return CPUID._isIntel; }
static bool isAMD() { return CPUID._isAMD; }
static bool isAMDZen3() { return CPUID._isAMD && CPUID._family > 24; }
static bool osAVX();
static bool osAVX2();
static bool osAVX512();
// flags reported by function 0x01
static bool SSE3() { return CPUID._f1_ECX[0]; } // -msse3
static bool SSSE3() { return CPUID._f1_ECX[9]; } // -DUSE_SSSE3 -mssse3
static bool SSE41() { return CPUID._f1_ECX[19]; } // -DUSE_SSE41 -msse4.1
static bool POPCNT() { return CPUID._f1_ECX[23]; } // -DUSE_POPCNT -mpopcnt
static bool OSXSAVE() { return CPUID._f1_ECX[27]; } // OS uses XSAVE/XRSTOR
static bool AVX() { return CPUID._f1_ECX[28]; } // AVX supported by CPU
static bool MMX() { return CPUID._f1_EDX[23]; } // -DUSE_MMX -mmmx
static bool SSE() { return CPUID._f1_EDX[25]; } // -msse
static bool SSE2() { return CPUID._f1_EDX[26]; } // -DUSE_SSE2 -msse2
// flags reported by function 0x07
static bool AVX2() { return CPUID._f7_EBX[5]; } // -mavx2
static bool BMI2() { return CPUID._f7_EBX[8]; } // -DUSE_PEXT -mbmi2
static bool AVX512F() { return CPUID._f7_EBX[16]; } // -mavx512f
static bool AVX512DQ() { return CPUID._f7_EBX[17]; } // -mavx512dq
static bool AVX512BW() { return CPUID._f7_EBX[30]; } // -mavx512bw
static bool AVX512VL() { return CPUID._f7_EBX[31]; } // -mavx512vl
static bool AVX512VNNI() { return CPUID._f7_ECX[11]; } // -mavx512vnni
// flags reported by function 0x0D
// flags reported by function 0x00000001
static bool SSE3() { return CPUID._f1_ECX[0]; }
static bool PCLMULQDQ() { return CPUID._f1_ECX[1]; }
static bool MONITOR() { return CPUID._f1_ECX[3]; }
static bool SSSE3() { return CPUID._f1_ECX[9]; }
static bool FMA3() { return CPUID._f1_ECX[12]; }
static bool CMPXCHG16B() { return CPUID._f1_ECX[13]; }
static bool SSE41() { return CPUID._f1_ECX[19]; }
static bool SSE42() { return CPUID._f1_ECX[20]; }
static bool MOVBE() { return CPUID._f1_ECX[22]; }
static bool POPCNT() { return CPUID._f1_ECX[23]; }
static bool AES() { return CPUID._f1_ECX[25]; }
static bool XSAVE() { return CPUID._f1_ECX[26]; }
static bool OSXSAVE() { return CPUID._f1_ECX[27]; }
static bool AVX() { return CPUID._f1_ECX[28]; }
static bool F16C() { return CPUID._f1_ECX[29]; }
static bool RDRAND() { return CPUID._f1_ECX[30]; }
static bool MSR() { return CPUID._f1_EDX[5]; }
static bool CX8() { return CPUID._f1_EDX[8]; }
static bool SEP() { return CPUID._f1_EDX[11]; }
static bool CMOV() { return CPUID._f1_EDX[15]; }
static bool CLFSH() { return CPUID._f1_EDX[19]; }
static bool MMX() { return CPUID._f1_EDX[23]; }
static bool FXSR() { return CPUID._f1_EDX[24]; }
static bool SSE() { return CPUID._f1_EDX[25]; }
static bool SSE2() { return CPUID._f1_EDX[26]; }
// flags reported by function 0x00000007
static bool FSGSBASE() { return CPUID._f7_EBX[0]; }
static bool BMI1() { return CPUID._f7_EBX[3]; }
static bool HLE() { return CPUID._isIntel && CPUID._f7_EBX[4]; }
static bool AVX2() { return CPUID._f7_EBX[5]; }
static bool BMI2() { return CPUID._f7_EBX[8]; }
static bool ERMS() { return CPUID._f7_EBX[9]; }
static bool INVPCID() { return CPUID._f7_EBX[10]; }
static bool RTM() { return CPUID._isIntel && CPUID._f7_EBX[11]; }
static bool MPX() { return CPUID._f7_EBX[14]; }
static bool AVX512F() { return CPUID._f7_EBX[16]; }
static bool AVX512DQ() { return CPUID._f7_EBX[17]; }
static bool RDSEED() { return CPUID._f7_EBX[18]; }
static bool ADX() { return CPUID._f7_EBX[19]; }
static bool AVX512IFMA() { return CPUID._f7_EBX[21]; }
static bool AVX512PF() { return CPUID._f7_EBX[26]; }
static bool AVX512ER() { return CPUID._f7_EBX[27]; }
static bool AVX512CD() { return CPUID._f7_EBX[28]; }
static bool SHA() { return CPUID._f7_EBX[29]; }
static bool AVX512BW() { return CPUID._f7_EBX[30]; }
static bool AVX512VL() { return CPUID._f7_EBX[31]; }
static bool PREFETCHWT1() { return CPUID._f7_ECX[0]; }
static bool AVX512VBMI() { return CPUID._f7_ECX[1]; }
static bool AVX512VBMI2() { return CPUID._f7_ECX[6]; }
static bool GFNI() { return CPUID._f7_ECX[8]; }
static bool VAES() { return CPUID._f7_ECX[9]; }
static bool AVX512VPCLMUL() { return CPUID._f7_ECX[10]; }
static bool AVX512VNNI() { return CPUID._f7_ECX[11]; }
static bool AVX512BITALG() { return CPUID._f7_ECX[12]; }
static bool AVX512VPOPCNTDQ() { return CPUID._f7_ECX[14]; }
static bool RDPID() { return CPUID._f7_ECX[22]; }
static bool AVX5124FMAPS() { return CPUID._f7_EDX[2]; }
static bool AVX5124VNNIW() { return CPUID._f7_EDX[3]; }
// flags reported by extended function 0x80000001
static bool LAHF() { return CPUID._f81_ECX[0]; }
static bool ABM() { return CPUID._f81_ECX[5]; }
static bool SSE4a() { return CPUID._f81_ECX[6]; }
static bool XOP() { return CPUID._f81_ECX[11]; }
static bool FMA4() { return CPUID._f81_ECX[16]; }
static bool TBM() { return CPUID._f81_ECX[21]; }
static bool SYSCALL() { return CPUID._isIntel && CPUID._f81_EDX[11]; }
static bool MMXEXT() { return CPUID._isAMD && CPUID._f81_EDX[22]; }
static bool RDTSCP() { return CPUID._isIntel && CPUID._f81_EDX[27]; }
static bool X64() { return CPUID._isIntel && CPUID._f81_EDX[29]; }
static bool _3DNOWEXT() { return CPUID._isAMD && CPUID._f81_EDX[30]; }
static bool _3DNOW() { return CPUID._isAMD && CPUID._f81_EDX[31]; }
static bool X64() { return CPUID._f81_EDX[29]; } // -DIS_64BIT
private:
static const CpuId CPUID;
@ -129,23 +79,19 @@ namespace Stockfish {
_isIntel{ false },
_isAMD{ false },
_f1_EAX{ 0 },
_f1_EBX{ 0 },
_f1_ECX{ 0 },
_f1_EDX{ 0 },
_f7_EAX{ 0 },
_f7_EBX{ 0 },
_f7_ECX{ 0 },
_f7_EDX{ 0 },
_f81_EAX{ 0 },
_f81_EBX{ 0 },
_f81_ECX{ 0 },
_fD_EAX{ 0 },
_fD_EDX{ 0 },
_f81_EDX{ 0 },
_data{},
_dataExt{},
_family{ 0 },
_model{ 0 },
_ext_family{ 0 },
_ext_model{ 0 }
_stepping{ 0 }
{
std::array<int32_t, 4> info;
@ -176,24 +122,29 @@ namespace Stockfish {
_isAMD = true;
}
// load bitset with flags for function 0x00000001
if (_idMax >= 1)
// load bitsets with flags for function 0x01
if (_idMax >= 0x01)
{
_f1_EAX = _data[1][0];
_f1_EBX = _data[1][1];
_f1_ECX = _data[1][2];
_f1_EDX = _data[1][3];
}
// load bitset with flags for function 0x00000007
if (_idMax >= 7)
// load bitsets with flags for function 0x07
if (_idMax >= 0x07)
{
_f7_EAX = _data[7][0];
_f7_EBX = _data[7][1];
_f7_ECX = _data[7][2];
_f7_EDX = _data[7][3];
}
// load output of function 0x0D
if (_idMax >= 0x0D)
{
_fD_EAX = _data[13][0];
_fD_EDX = _data[13][3];
}
// calling cpuid with 0x80000000
// gets the number of the highest valid extended function ID
cpuid(info.data(), 0x80000000, 0);
@ -208,9 +159,6 @@ namespace Stockfish {
// load bitset with flags for extended function 0x80000001
if (_idExtMax >= 0x80000001)
{
_f81_EAX = _dataExt[1][0];
_f81_EBX = _dataExt[1][1];
_f81_ECX = _dataExt[1][2];
_f81_EDX = _dataExt[1][3];
}
@ -225,69 +173,60 @@ namespace Stockfish {
}
// compute X86 Family and Model
const int32_t signature = _data[1][0];
_family = (signature >> 8) & 0x0F;
_model = (signature >> 4) & 0x0F;
_ext_family = 0;
_ext_model = 0;
// The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A"
// specifies the Extended Model is defined only when the Base Family is
// 06h or 0Fh.
// The "AMD CPUID Specification" specifies that the Extended Model is
// defined only when Base Family is 0Fh.
// Both manuals define the display model as
// {ExtendedModel[3:0],BaseModel[3:0]} in that case.
_family = (_f1_EAX >> 8) & 0x0F;
_model = (_f1_EAX >> 4) & 0x0F;
_stepping = _f1_EAX & 0x0F;
int32_t ext_family = 0;
int32_t ext_model = 0;
// The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" specifies the Extended Model
// is defined only when the Base Family is 06h or 0Fh.
// The "AMD CPUID Specification" specifies that the Extended Model is defined only when Base Family is 0Fh.
// Both manuals define the display model as {ExtendedModel[3:0],BaseModel[3:0]} in that case.
if (_family == 0x0F || (_family == 0x06 && _isIntel))
{
_ext_model = (signature >> 16) & 0x0F;
_model += _ext_model << 4;
ext_model = (_f1_EAX >> 16) & 0x0F;
_model += ext_model << 4;
}
// Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A"
// and the "AMD CPUID Specification" specify that the Extended Family is
// defined only when the Base Family is 0Fh.
// Both manuals define the display family as {0000b,BaseFamily[3:0]} +
// ExtendedFamily[7:0] in that case.
// Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A" and the "AMD CPUID Specification"
// specify that the Extended Family is defined only when the Base Family is 0Fh.
// Both manuals define the display family as {0000b,BaseFamily[3:0]} + ExtendedFamily[7:0] in that case.
if (_family == 0x0F)
{
_ext_family = (signature >> 20) & 0xFF;
_family += _ext_family;
ext_family = (_f1_EAX >> 20) & 0xFF;
_family += ext_family;
}
};
uint32_t _idMax;
uint32_t _idExtMax;
bool _isIntel;
bool _isAMD;
std::bitset<32> _f1_EAX;
std::bitset<32> _f1_EBX;
std::bitset<32> _f1_ECX;
std::bitset<32> _f1_EDX;
std::bitset<32> _f7_EAX;
std::bitset<32> _f7_EBX;
std::bitset<32> _f7_ECX;
std::bitset<32> _f7_EDX;
std::bitset<32> _f81_EAX;
std::bitset<32> _f81_EBX;
std::bitset<32> _f81_ECX;
std::bitset<32> _f81_EDX;
std::vector<std::array<int32_t, 4>> _data;
std::vector<std::array<int32_t, 4>> _dataExt;
std::string _vendor;
std::string _brand;
bool _isIntel;
bool _isAMD;
int32_t _family;
int32_t _model;
int32_t _ext_family;
int32_t _ext_model;
int32_t _stepping;
int32_t _f1_EAX;
std::bitset<32> _f1_ECX;
std::bitset<32> _f1_EDX;
std::bitset<32> _f7_EBX;
std::bitset<32> _f7_ECX;
std::bitset<32> _f7_EDX;
int32_t _fD_EAX;
int32_t _fD_EDX;
std::bitset<32> _f81_EDX;
private:
uint32_t _idMax;
uint32_t _idExtMax;
std::vector<std::array<int32_t, 4>> _data;
std::vector<std::array<int32_t, 4>> _dataExt;
};
private:
static void cpuid(int32_t out[4], int32_t eax, int32_t ecx);
static uint64_t xgetbv(unsigned int x);
static inline uint64_t xcrFeatureMask() { return ((uint64_t)CPUID._fD_EDX << 32) | CPUID._fD_EAX; } // XCR0 XFEATURE_ENABLED_MASK
};
} // namespace Stockfish

View File

@ -278,7 +278,7 @@ void UCI::loop(int argc, char* argv[]) {
else if (token == "d") sync_cout << pos << sync_endl;
else if (token == "eval") trace_eval(pos);
else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
else if (token == "cpu") sync_cout << CpuInfo::get_info_string() << sync_endl;
else if (token == "cpu") sync_cout << CpuInfo::infoString() << sync_endl;
else if (token == "export_net")
{
std::optional<std::string> filename;