1
0
Fork 0

Remove custom mutex implementation

As part of the investigation of the hang caused by an incorrect implementation
of condition_variable in libwinpthread, it was realized that our custom Mutex
implementation is no longer needed. Prior to lazySMP this custom implementation
resulted in a 30% speedup, but now no speed difference can be measured as no
mutex is used on the hot path in lazySMP.

https://github.com/official-stockfish/Stockfish/issues/2291
https://github.com/official-stockfish/Stockfish/issues/2309#issuecomment-533733393  https://github.com/official-stockfish/Stockfish/issues/2309#issuecomment-533737515

The interest of this patch is that it removes platform-specific code, which is
always less tested.

No functional change.
pull/2330/head
Joost VandeVondele 2019-09-16 07:51:25 +02:00 committed by Stéphane Nicolet
parent 8726beba59
commit d703d2b5e7
5 changed files with 16 additions and 61 deletions

View File

@ -168,7 +168,7 @@ void dbg_print() {
std::ostream& operator<<(std::ostream& os, SyncCout sc) {
static Mutex m;
static std::mutex m;
if (sc == IO_LOCK)
m.lock();

View File

@ -27,12 +27,12 @@
#include <list>
#include <sstream>
#include <type_traits>
#include <mutex>
#include "../bitboard.h"
#include "../movegen.h"
#include "../position.h"
#include "../search.h"
#include "../thread_win32_osx.h"
#include "../types.h"
#include "../uci.h"
@ -45,7 +45,9 @@
#include <sys/stat.h>
#else
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#ifndef NOMINMAX
# define NOMINMAX // Disable macros min() and max()
#endif
#include <windows.h>
#endif
@ -1124,14 +1126,14 @@ void set(T& e, uint8_t* data) {
template<TBType Type>
void* mapped(TBTable<Type>& e, const Position& pos) {
static Mutex mutex;
static std::mutex mutex;
// Use 'acquire' to avoid a thread reading 'ready' == true while
// another is still working. (compiler reordering may cause this).
if (e.ready.load(std::memory_order_acquire))
return e.baseAddress; // Could be nullptr if file does not exist
std::unique_lock<Mutex> lk(mutex);
std::unique_lock<std::mutex> lk(mutex);
if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock
return e.baseAddress;

View File

@ -81,7 +81,7 @@ void Thread::clear() {
void Thread::start_searching() {
std::lock_guard<Mutex> lk(mutex);
std::lock_guard<std::mutex> lk(mutex);
searching = true;
cv.notify_one(); // Wake up the thread in idle_loop()
}
@ -92,7 +92,7 @@ void Thread::start_searching() {
void Thread::wait_for_search_finished() {
std::unique_lock<Mutex> lk(mutex);
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&]{ return !searching; });
}
@ -112,7 +112,7 @@ void Thread::idle_loop() {
while (true)
{
std::unique_lock<Mutex> lk(mutex);
std::unique_lock<std::mutex> lk(mutex);
searching = false;
cv.notify_one(); // Wake up anyone waiting for search finished
cv.wait(lk, [&]{ return searching; });

View File

@ -42,8 +42,8 @@
class Thread {
Mutex mutex;
ConditionVariable cv;
std::mutex mutex;
std::condition_variable cv;
size_t idx;
bool exit = false, searching = true; // Set before starting std::thread
NativeThread stdThread;

View File

@ -21,60 +21,13 @@
#ifndef THREAD_WIN32_OSX_H_INCLUDED
#define THREAD_WIN32_OSX_H_INCLUDED
/// STL thread library used by mingw and gcc when cross compiling for Windows
/// relies on libwinpthread. Currently libwinpthread implements mutexes directly
/// on top of Windows semaphores. Semaphores, being kernel objects, require kernel
/// mode transition in order to lock or unlock, which is very slow compared to
/// interlocked operations (about 30% slower on bench test). To work around this
/// issue, we define our wrappers to the low level Win32 calls. We use critical
/// sections to support Windows XP and older versions. Unfortunately, cond_wait()
/// is racy between unlock() and WaitForSingleObject() but they have the same
/// speed performance as the SRW locks.
#include <condition_variable>
#include <mutex>
#include <thread>
#if defined(_WIN32) && !defined(_MSC_VER)
#ifndef NOMINMAX
# define NOMINMAX // Disable macros min() and max()
#endif
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
#undef NOMINMAX
/// Mutex and ConditionVariable struct are wrappers of the low level locking
/// machinery and are modeled after the corresponding C++11 classes.
struct Mutex {
Mutex() { InitializeCriticalSection(&cs); }
~Mutex() { DeleteCriticalSection(&cs); }
void lock() { EnterCriticalSection(&cs); }
void unlock() { LeaveCriticalSection(&cs); }
private:
CRITICAL_SECTION cs;
};
typedef std::condition_variable_any ConditionVariable;
#else // Default case: use STL classes
typedef std::mutex Mutex;
typedef std::condition_variable ConditionVariable;
#endif
/// On OSX threads other than the main thread are created with a reduced stack
/// size of 512KB by default, this is dangerously low for deep searches, so
/// adjust it to TH_STACK_SIZE. The implementation calls pthread_create() with
/// proper stack size parameter.
/// On toolchains where pthread is always available, ensure minimum stack size
/// instead of relying on linker defaults which may be platform-specific.
/// size of 512KB by default, this is too low for deep searches, which require
/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
/// The implementation calls pthread_create() with the stack size parameter
/// equal to the linux 8MB default, on platforms that support it.
#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__)