1
0
Fork 0
Start all threads searching on root position and
use only the shared TT table as synching scheme.

It seems this scheme scales better than YBWC for
high number of threads.

Tested at very LTC (120+0.1) with 23 threads
ELO: 35.52 +-9.6 (95%) LOS: 100.0%
Total: 1109 W: 183 L: 70 D: 856

Tested at LTC with 23 threads
ELO: 34.41 +-9.9 (95%) LOS: 100.0%
Total: 1094 W: 184 L: 76 D: 834

Tested at LTC with 7 threads
ELO: 8.76 +-5.0 (95%) LOS: 100.0%
Total: 5000 W: 735 L: 609 D: 3656

Tested at STC with 7 threads
ELO: 16.76 +-5.4 (95%) LOS: 100.0%
Total: 5000 W: 899 L: 658 D: 3443

Bench: 8397672
lazy_smp
mbootsector 2015-10-06 08:15:17 +02:00 committed by Marco Costalba
parent 7ea5659c5f
commit 2d668a3cfc
7 changed files with 345 additions and 684 deletions

View File

@ -158,7 +158,7 @@ void benchmark(const Position& current, istream& is) {
Search::StateStackPtr st;
Threads.start_thinking(pos, limits, st);
Threads.main()->join();
nodes += Search::RootPos.nodes_searched();
nodes += Threads.nodes_searched();
}
}

View File

@ -320,10 +320,3 @@ Move MovePicker::next_move<false>() {
}
}
}
/// Version of next_move() to use at split point nodes where the move is grabbed
/// from the split point's shared MovePicker object. This function is not thread
/// safe so must be lock protected by the caller.
template<>
Move MovePicker::next_move<true>() { return ss->splitPoint->movePicker->next_move<false>(); }

File diff suppressed because it is too large Load Diff

View File

@ -101,8 +101,6 @@ typedef std::unique_ptr<std::stack<StateInfo>> StateStackPtr;
extern volatile SignalsType Signals;
extern LimitsType Limits;
extern RootMoveVector RootMoves;
extern Position RootPos;
extern StateStackPtr SetupStates;
void init();

View File

@ -82,143 +82,10 @@ Thread::Thread() /* : splitPoints() */ { // Initialization of non POD broken in
searching = false;
maxPly = 0;
splitPointsSize = 0;
activeSplitPoint = nullptr;
activePosition = nullptr;
idx = Threads.size(); // Starts from 0
}
// Thread::cutoff_occurred() checks whether a beta cutoff has occurred in the
// current active split point, or in some ancestor of the split point.
bool Thread::cutoff_occurred() const {
for (SplitPoint* sp = activeSplitPoint; sp; sp = sp->parentSplitPoint)
if (sp->cutoff)
return true;
return false;
}
// Thread::can_join() checks whether the thread is available to join the split
// point 'sp'. An obvious requirement is that thread must be idle. With more than
// two threads, this is not sufficient: If the thread is the master of some split
// point, it is only available as a slave for the split points below his active
// one (the "helpful master" concept in YBWC terminology).
bool Thread::can_join(const SplitPoint* sp) const {
if (searching)
return false;
// Make a local copy to be sure it doesn't become zero under our feet while
// testing next condition and so leading to an out of bounds access.
const size_t size = splitPointsSize;
// No split points means that the thread is available as a slave for any
// other thread otherwise apply the "helpful master" concept if possible.
return !size || splitPoints[size - 1].slavesMask.test(sp->master->idx);
}
// Thread::split() does the actual work of distributing the work at a node between
// several available threads. If it does not succeed in splitting the node
// (because no idle threads are available), the function immediately returns.
// If splitting is possible, a SplitPoint object is initialized with all the
// data that must be copied to the helper threads and then helper threads are
// informed that they have been assigned work. This will cause them to instantly
// leave their idle loops and call search(). When all threads have returned from
// search() then split() returns.
void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bestValue,
Move* bestMove, Depth depth, int moveCount,
MovePicker* movePicker, int nodeType, bool cutNode) {
assert(searching);
assert(-VALUE_INFINITE < *bestValue && *bestValue <= alpha && alpha < beta && beta <= VALUE_INFINITE);
assert(depth >= Threads.minimumSplitDepth);
assert(splitPointsSize < MAX_SPLITPOINTS_PER_THREAD);
// Pick and init the next available split point
SplitPoint& sp = splitPoints[splitPointsSize];
sp.spinlock.acquire(); // No contention here until we don't increment splitPointsSize
sp.master = this;
sp.parentSplitPoint = activeSplitPoint;
sp.slavesMask = 0, sp.slavesMask.set(idx);
sp.depth = depth;
sp.bestValue = *bestValue;
sp.bestMove = *bestMove;
sp.alpha = alpha;
sp.beta = beta;
sp.nodeType = nodeType;
sp.cutNode = cutNode;
sp.movePicker = movePicker;
sp.moveCount = moveCount;
sp.pos = &pos;
sp.nodes = 0;
sp.cutoff = false;
sp.ss = ss;
sp.allSlavesSearching = true; // Must be set under lock protection
++splitPointsSize;
activeSplitPoint = &sp;
activePosition = nullptr;
// Try to allocate available threads
Thread* slave;
while ( sp.slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
&& (slave = Threads.available_slave(&sp)) != nullptr)
{
slave->spinlock.acquire();
if (slave->can_join(activeSplitPoint))
{
activeSplitPoint->slavesMask.set(slave->idx);
slave->activeSplitPoint = activeSplitPoint;
slave->searching = true;
}
slave->spinlock.release();
}
// Everything is set up. The master thread enters the idle loop, from which
// it will instantly launch a search, because its 'searching' flag is set.
// The thread will return from the idle loop when all slaves have finished
// their work at this split point.
sp.spinlock.release();
Thread::idle_loop(); // Force a call to base class idle_loop()
// In the helpful master concept, a master can help only a sub-tree of its
// split point and because everything is finished here, it's not possible
// for the master to be booked.
assert(!searching);
assert(!activePosition);
// We have returned from the idle loop, which means that all threads are
// finished. Note that decreasing splitPointsSize must be done under lock
// protection to avoid a race with Thread::can_join().
spinlock.acquire();
searching = true;
--splitPointsSize;
activeSplitPoint = sp.parentSplitPoint;
activePosition = &pos;
spinlock.release();
// Split point data cannot be changed now, so no need to lock protect
pos.set_nodes_searched(pos.nodes_searched() + sp.nodes);
*bestMove = sp.bestMove;
*bestValue = sp.bestValue;
}
// TimerThread::idle_loop() is where the timer thread waits Resolution milliseconds
// and then calls check_time(). When not searching, thread sleeps until it's woken up.
@ -260,13 +127,7 @@ void MainThread::idle_loop() {
if (!exit)
{
searching = true;
Search::think();
assert(searching);
searching = false;
}
}
}
@ -317,7 +178,6 @@ void ThreadPool::exit() {
void ThreadPool::read_uci_options() {
minimumSplitDepth = Options["Min Split Depth"] * ONE_PLY;
size_t requested = Options["Threads"];
assert(requested > 0);
@ -333,16 +193,14 @@ void ThreadPool::read_uci_options() {
}
// ThreadPool::available_slave() tries to find an idle thread which is available
// to join SplitPoint 'sp'.
// ThreadPool::nodes_searched() returns the number of nodes searched.
Thread* ThreadPool::available_slave(const SplitPoint* sp) const {
uint64_t ThreadPool::nodes_searched() {
for (Thread* th : *this)
if (th->can_join(sp))
return th;
return nullptr;
uint64_t nodes = 0;
for (Thread *th : *this)
nodes += th->pos.nodes_searched();
return nodes;
}
@ -351,13 +209,14 @@ Thread* ThreadPool::available_slave(const SplitPoint* sp) const {
void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits,
StateStackPtr& states) {
main()->join();
Signals.stopOnPonderhit = Signals.firstRootMove = false;
Signals.stop = Signals.failedLowAtRoot = false;
RootMoves.clear();
RootPos = pos;
main()->rootMoves.clear();
main()->pos = pos;
Limits = limits;
if (states.get()) // If we don't set a new position, preserve current state
{
@ -368,7 +227,7 @@ void ThreadPool::start_thinking(const Position& pos, const LimitsType& limits,
for (const auto& m : MoveList<LEGAL>(pos))
if ( limits.searchmoves.empty()
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
RootMoves.push_back(RootMove(m));
main()->rootMoves.push_back(RootMove(m));
main()->thinking = true;
main()->notify_one(); // Wake up main thread: 'thinking' must be already set

View File

@ -37,53 +37,6 @@
struct Thread;
const size_t MAX_THREADS = 128;
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;
class Spinlock {
std::atomic_int lock;
public:
Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013
void acquire() {
while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
while (lock.load(std::memory_order_relaxed) <= 0)
std::this_thread::yield(); // Be nice to hyperthreading
}
void release() { lock.store(1, std::memory_order_release); }
};
/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.
struct SplitPoint {
// Const data after split point has been setup
const Position* pos;
Search::Stack* ss;
Thread* master;
Depth depth;
Value beta;
int nodeType;
bool cutNode;
// Const pointers to shared data
MovePicker* movePicker;
SplitPoint* parentSplitPoint;
// Shared variable data
Spinlock spinlock;
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
volatile Value alpha;
volatile Value bestValue;
volatile Move bestMove;
volatile int moveCount;
volatile bool cutoff;
};
/// ThreadBase struct is the base of the hierarchy from where we derive all the
@ -97,7 +50,6 @@ struct ThreadBase : public std::thread {
void wait_for(volatile const bool& b);
Mutex mutex;
Spinlock spinlock;
ConditionVariable sleepCondition;
volatile bool exit = false;
};
@ -112,22 +64,22 @@ struct Thread : public ThreadBase {
Thread();
virtual void idle_loop();
bool cutoff_occurred() const;
bool can_join(const SplitPoint* sp) const;
virtual void id_loop();
void split(Position& pos, Search::Stack* ss, Value alpha, Value beta, Value* bestValue, Move* bestMove,
Depth depth, int moveCount, MovePicker* movePicker, int nodeType, bool cutNode);
SplitPoint splitPoints[MAX_SPLITPOINTS_PER_THREAD];
Pawns::Table pawnsTable;
Material::Table materialTable;
Endgames endgames;
Position* activePosition;
size_t idx;
size_t idx, PVIdx;
int maxPly;
SplitPoint* volatile activeSplitPoint;
volatile size_t splitPointsSize;
volatile bool searching;
// Data per thread.
Position pos;
Search::RootMoveVector rootMoves;
Search::Stack stack[MAX_PLY+4];
HistoryStats History;
MovesStats Countermoves;
Depth rootDepth;
};
@ -138,6 +90,7 @@ struct MainThread : public Thread {
virtual void idle_loop();
void join();
volatile bool thinking = true; // Avoid a race with start_thinking()
std::bitset<MAX_THREADS> slavesMask;
};
struct TimerThread : public ThreadBase {
@ -161,10 +114,8 @@ struct ThreadPool : public std::vector<Thread*> {
MainThread* main() { return static_cast<MainThread*>(at(0)); }
void read_uci_options();
Thread* available_slave(const SplitPoint* sp) const;
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);
Depth minimumSplitDepth;
uint64_t nodes_searched();
TimerThread* timer;
};

View File

@ -22,6 +22,7 @@
#include "misc.h"
#include "search.h"
#include "thread.h"
/// The TimeManagement class computes the optimal time to think depending on
/// the maximum available time, the game move number and other parameters.
@ -32,7 +33,7 @@ public:
void pv_instability(double bestMoveChanges) { unstablePvFactor = 1 + bestMoveChanges; }
int available() const { return int(optimumTime * unstablePvFactor * 0.76); }
int maximum() const { return maximumTime; }
int elapsed() const { return int(Search::Limits.npmsec ? Search::RootPos.nodes_searched() : now() - start); }
int elapsed() const { return int(Search::Limits.npmsec ? Threads.nodes_searched() : now() - start); }
int64_t availableNodes; // When in 'nodes as time' mode