1
0
Fork 0

Refactor and simplify the CpuInfo class to a stripped down version that only works out the flags it needs to know

pull/3602/head
JackWright347 2021-07-30 15:56:09 +02:00
parent a8dec5e9fc
commit 18f1cd7d15
2 changed files with 73 additions and 69 deletions

View File

@ -29,13 +29,17 @@ const CpuInfo::CpuId Stockfish::CpuInfo::CPUID;
#include <windows.h>
#include <intrin.h>
void CpuInfo::cpuid(int32_t out[4], int32_t eax, int32_t ecx) { __cpuidex(out, eax, ecx); }
void CpuInfo::cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
__cpuidex(out, eax, ecx);
}
# elif defined(__GNUC__) || defined(__clang__)
#include <cpuid.h>
void CpuInfo::cpuid(int32_t out[4], int32_t eax, int32_t ecx) { __cpuid_count(eax, ecx, out[0], out[1], out[2], out[3]); }
void CpuInfo::cpuid(int32_t out[4], int32_t eax, int32_t ecx) {
__cpuid_count(eax, ecx, out[0], out[1], out[2], out[3]);
}
#else
# message "No CPU-ID intrinsic defined for compiler."
@ -47,7 +51,7 @@ const CpuInfo::CpuId Stockfish::CpuInfo::CPUID;
bool CpuInfo::osAVX() {
if (OSXSAVE() && AVX())
{
// Check OS has enabled both XMM and YMM state support. Necessary for AVX and AVX2.
// check OS has enabled XMM and YMM state support (necessary for AVX and AVX2)
return (xcrFeatureMask() & 0x06) == 0x06;
}
return false;
@ -64,7 +68,7 @@ bool CpuInfo::osAVX2() {
bool CpuInfo::osAVX512() {
if (osAVX() && AVX512F() && AVX512BW())
{
// Check for OS-support of ZMM and YMM state. Necessary for AVX-512.
// check OS has enabled XMM, YMM and ZMM state support (necessary for AVX-512)
return (xcrFeatureMask() & 0xE6) == 0xE6;
}
return false;
@ -73,7 +77,7 @@ bool CpuInfo::osAVX512() {
std::string CpuInfo::infoString() {
std::string s;
s += "\nVendor: ";
s += "\nVendor : ";
s += vendor();
s += ", Family: ";
s += std::to_string(CPUID._family);
@ -82,30 +86,33 @@ std::string CpuInfo::infoString() {
s += ", Stepping: ";
s += std::to_string(CPUID._stepping);
s += "\n";
s += "Brand: ";
s += "Brand : ";
s += brand();
s += "\n";
s += "Hardware Features: ";
if (X64()) s += "64bit ";
if (MMX()) s += "MMX ";
if (SSE()) s += "SSE ";
if (SSE2()) s += "SSE2 ";
if (SSE3()) s += "SSE3 ";
if (SSSE3()) s += "SSSE3 ";
if (SSE41()) s += "SSE4.1 ";
if (POPCNT()) s += "POPCNT ";
if (AVX()) s += "AVX ";
if (AVX2()) s += "AVX2 ";
if (BMI2()) s += "BMI2 ";
s += "\n";
s += "OS Supported Features: ";
(osAVX()) ? s += "AVX = yes, " : s += "AVX = no, ";
(osAVX2()) ? s += "AVX2 = yes, " : s += "AVX2 = no, ";
(osAVX512()) ? s += "AVX-512 = yes" : s += "AVX-512 = no";
s += "\n";
s += "\nCPU : ";
bool fs = true; // full set of featues supported?
if (X64()) { s += "64bit "; } else { s += "[64bit] "; fs = false; }
if (MMX()) { s += "MMX "; } else { s += "[MMX] "; fs = false; }
if (SSE()) { s += "SSE "; } else { s += "[SSE] "; fs = false; }
if (SSE2()) { s += "SSE2 "; } else { s += "[SSE2] "; fs = false; }
if (SSE3()) { s += "SSE3 "; } else { s += "[SSE3] "; fs = false; }
if (SSSE3()) { s += "SSSE3 "; } else { s += "[SSSE3] "; fs = false; }
if (SSE41()) { s += "SSE4.1 "; } else { s += "[SSE4.1] "; fs = false; }
if (POPCNT()) { s += "POPCNT "; } else { s += "[POPCNT] "; fs = false; }
if (AVX()) { s += "AVX "; } else { s += "[AVX] "; fs = false; }
if (AVX2()) { s += "AVX2 "; } else { s += "[AVX2] "; fs = false; }
if (BMI2()) { isAMDBeforeZen3() ? s += "BMI2(slow PEXT)" : s += "BMI2"; } else { s += "[BMI2]"; fs = false; }
s += "\n ";
if (AVX512F()) { s += "AVX-512F "; } else { s += "[AVX-512F] "; fs = false; }
if (AVX512DQ()) { s += "AVX-512DQ "; } else { s += "[AVX-512DQ] "; fs = false; }
if (AVX512BW()) { s += "AVX-512BW "; } else { s += "[AVX-512BW] "; fs = false; }
if (AVX512VL()) { s += "AVX-512VL "; } else { s += "[AVX-512VL] "; fs = false; }
if (AVX512VNNI()) { s += "AVX-512VNNI"; } else { s += "[AVX-512VNNI]"; fs = false; }
s += "\nOS : ";
if (osAVX()) { s += "AVX "; } else { s += "[AVX] "; fs = false; }
if (osAVX2()) { s += "AVX2 "; } else { s += "[AVX2] "; fs = false; }
if (osAVX512()) { s += "AVX-512"; } else { s += "[AVX-512]"; fs = false; }
fs ? s += "\nAll features are supported.\n" :
s += "\nValues in brackets mean that this feature is not supported by the CPU or the OS.\n";
return s;
}

View File

@ -37,9 +37,9 @@ namespace Stockfish {
static std::string brand() { return CPUID._brand; }
static std::string infoString();
static bool isIntel() { return CPUID._isIntel; }
static bool isAMD() { return CPUID._isAMD; }
static bool isAMDZen3() { return CPUID._isAMD && CPUID._family > 24; }
static bool isIntel() { return CPUID._isIntel; }
static bool isAMD() { return CPUID._isAMD; }
static bool isAMDBeforeZen3() { return CPUID._isAMD && CPUID._family < 25; }
static bool osAVX();
static bool osAVX2();
static bool osAVX512();
@ -87,30 +87,32 @@ namespace Stockfish {
_f7_ECX{ 0 },
_f7_EDX{ 0 },
_fD_xcrFeatureMask{ 0 },
_f81_EDX{ 0 },
_idMax{ 0 },
_idExtMax{ 0 },
_data{},
_dataExt{}
_f81_EDX{ 0 }
{
std::array<int32_t, 4> info;
uint32_t idMax{ 0 };
uint32_t idExtMax{ 0 };
std::vector<std::array<int32_t, 4>> data;
std::vector<std::array<int32_t, 4>> dataExt;
// calling cpuid with 0x0
// gets the number of the highest valid function ID
cpuid(info.data(), 0, 0);
_idMax = info[0];
idMax = info[0];
// Optimization: 0x0D is the highest function we need to know results of
if (idMax > 0x0D) { idMax = 0x0D; }
// call each function and store results in _data
for (uint32_t i = 0; i <= _idMax; ++i)
for (uint32_t i = 0; i <= idMax; ++i)
{
cpuid(info.data(), i, 0);
_data.push_back(info);
data.push_back(info);
}
// retrieve CPU vendor string
char vendor[3*sizeof(int32_t) + 1] { 0 };
memcpy(vendor, &_data[0][1], sizeof(int32_t));
memcpy(vendor + 4, &_data[0][3], sizeof(int32_t));
memcpy(vendor + 8, &_data[0][2], sizeof(int32_t));
memcpy(vendor, &data[0][1], sizeof(int32_t));
memcpy(vendor + 4, &data[0][3], sizeof(int32_t));
memcpy(vendor + 8, &data[0][2], sizeof(int32_t));
_vendor = vendor;
if (_vendor == "GenuineIntel")
@ -123,51 +125,53 @@ namespace Stockfish {
}
// load bitsets with flags for function 0x01
if (_idMax >= 0x01)
if (idMax >= 0x01)
{
_f1_EAX = _data[1][0];
_f1_ECX = _data[1][2];
_f1_EDX = _data[1][3];
_f1_EAX = data[1][0];
_f1_ECX = data[1][2];
_f1_EDX = data[1][3];
}
// load bitsets with flags for function 0x07
if (_idMax >= 0x07)
if (idMax >= 0x07)
{
_f7_EBX = _data[7][1];
_f7_ECX = _data[7][2];
_f7_EDX = _data[7][3];
_f7_EBX = data[7][1];
_f7_ECX = data[7][2];
_f7_EDX = data[7][3];
}
// load output of function 0x0D
if (_idMax >= 0x0D)
if (idMax >= 0x0D)
{
_fD_xcrFeatureMask = ((uint64_t)_data[13][3] << 32) | _data[13][0];
_fD_xcrFeatureMask = ((uint64_t)data[13][3] << 32) | data[13][0];
}
// calling cpuid with 0x80000000
// gets the number of the highest valid extended function ID
cpuid(info.data(), 0x80000000, 0);
_idExtMax = info[0];
idExtMax = info[0];
// Optimization: 0x80000004 is the highest extended function we need to know results of
if (idExtMax > 0x80000004) { idExtMax = 0x80000004; }
// call each extended function and store results in _dataExt
for (uint32_t i = 0x80000000; i <= _idExtMax; ++i)
for (uint32_t i = 0x80000000; i <= idExtMax; ++i)
{
cpuid(info.data(), i, 0);
_dataExt.push_back(info);
dataExt.push_back(info);
}
// load bitset with flags for extended function 0x80000001
if (_idExtMax >= 0x80000001)
if (idExtMax >= 0x80000001)
{
_f81_EDX = _dataExt[1][3];
_f81_EDX = dataExt[1][3];
}
// retrieve CPU brand string if reported
if (_idExtMax >= 0x80000004)
if (idExtMax >= 0x80000004)
{
char brand[3*sizeof(info) + 1] { 0 };
memcpy(brand, _dataExt[2].data(), sizeof(info));
memcpy(brand + 16, _dataExt[3].data(), sizeof(info));
memcpy(brand + 32, _dataExt[4].data(), sizeof(info));
memcpy(brand, dataExt[2].data(), sizeof(info));
memcpy(brand + 16, dataExt[3].data(), sizeof(info));
memcpy(brand + 32, dataExt[4].data(), sizeof(info));
_brand = brand;
}
@ -214,14 +218,7 @@ namespace Stockfish {
std::bitset<32> _f7_EDX;
uint64_t _fD_xcrFeatureMask;
std::bitset<32> _f81_EDX;
private:
uint32_t _idMax;
uint32_t _idExtMax;
std::vector<std::array<int32_t, 4>> _data;
std::vector<std::array<int32_t, 4>> _dataExt;
}; // class CpuId
};
};
} // namespace Stockfish