Simplify Skill implementation
Currently we handle the UCI_Elo with a double randomization. This seems not necessary and a bit involuted. This patch removes the first randomization and unifies the 2 cases. closes https://github.com/official-stockfish/Stockfish/pull/3769 No functional change.pull/3764/head^2
parent
0e89d6e754
commit
ef4822aa8d
1
AUTHORS
1
AUTHORS
|
@ -194,6 +194,7 @@ tttak
|
||||||
Unai Corzo (unaiic)
|
Unai Corzo (unaiic)
|
||||||
Uri Blass (uriblass)
|
Uri Blass (uriblass)
|
||||||
Vince Negri (cuddlestmonkey)
|
Vince Negri (cuddlestmonkey)
|
||||||
|
xefoci7612
|
||||||
zz4032
|
zz4032
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -112,14 +112,22 @@ namespace {
|
||||||
return thisThread->state;
|
return thisThread->state;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skill structure is used to implement strength limit
|
// Skill structure is used to implement strength limit. If we have an uci_elo then
|
||||||
|
// we convert it to a suitable fractional skill level using anchoring to CCRL Elo
|
||||||
|
// (goldfish 1.13 = 2000) and a fit through Ordo derived Elo for match (TC 60+0.6)
|
||||||
|
// results spanning a wide range of k values.
|
||||||
struct Skill {
|
struct Skill {
|
||||||
explicit Skill(int l) : level(l) {}
|
Skill(int skill_level, int uci_elo) {
|
||||||
bool enabled() const { return level < 20; }
|
if (uci_elo)
|
||||||
bool time_to_pick(Depth depth) const { return depth == 1 + level; }
|
level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0);
|
||||||
|
else
|
||||||
|
level = double(skill_level);
|
||||||
|
}
|
||||||
|
bool enabled() const { return level < 20.0; }
|
||||||
|
bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
|
||||||
Move pick_best(size_t multiPV);
|
Move pick_best(size_t multiPV);
|
||||||
|
|
||||||
int level;
|
double level;
|
||||||
Move best = MOVE_NONE;
|
Move best = MOVE_NONE;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -243,10 +251,11 @@ void MainThread::search() {
|
||||||
Time.availableNodes += Limits.inc[us] - Threads.nodes_searched();
|
Time.availableNodes += Limits.inc[us] - Threads.nodes_searched();
|
||||||
|
|
||||||
Thread* bestThread = this;
|
Thread* bestThread = this;
|
||||||
|
Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0);
|
||||||
|
|
||||||
if ( int(Options["MultiPV"]) == 1
|
if ( int(Options["MultiPV"]) == 1
|
||||||
&& !Limits.depth
|
&& !Limits.depth
|
||||||
&& !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"]))
|
&& !skill.enabled()
|
||||||
&& rootMoves[0].pv[0] != MOVE_NONE)
|
&& rootMoves[0].pv[0] != MOVE_NONE)
|
||||||
bestThread = Threads.get_best_thread();
|
bestThread = Threads.get_best_thread();
|
||||||
|
|
||||||
|
@ -311,19 +320,7 @@ void Thread::search() {
|
||||||
std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0);
|
std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0);
|
||||||
|
|
||||||
size_t multiPV = size_t(Options["MultiPV"]);
|
size_t multiPV = size_t(Options["MultiPV"]);
|
||||||
|
Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0);
|
||||||
// Pick integer skill levels, but non-deterministically round up or down
|
|
||||||
// such that the average integer skill corresponds to the input floating point one.
|
|
||||||
// UCI_Elo is converted to a suitable fractional skill level, using anchoring
|
|
||||||
// to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo
|
|
||||||
// for match (TC 60+0.6) results spanning a wide range of k values.
|
|
||||||
PRNG rng(now());
|
|
||||||
double floatLevel = Options["UCI_LimitStrength"] ?
|
|
||||||
std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
|
|
||||||
double(Options["Skill Level"]);
|
|
||||||
int intLevel = int(floatLevel) +
|
|
||||||
((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0);
|
|
||||||
Skill skill(intLevel);
|
|
||||||
|
|
||||||
// When playing with strength handicap enable MultiPV search that we will
|
// When playing with strength handicap enable MultiPV search that we will
|
||||||
// use behind the scenes to retrieve a set of possible moves.
|
// use behind the scenes to retrieve a set of possible moves.
|
||||||
|
@ -1780,8 +1777,8 @@ moves_loop: // When in check, search starts here
|
||||||
// RootMoves are already sorted by score in descending order
|
// RootMoves are already sorted by score in descending order
|
||||||
Value topScore = rootMoves[0].score;
|
Value topScore = rootMoves[0].score;
|
||||||
int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg);
|
int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg);
|
||||||
int weakness = 120 - 2 * level;
|
|
||||||
int maxScore = -VALUE_INFINITE;
|
int maxScore = -VALUE_INFINITE;
|
||||||
|
double weakness = 120 - 2 * level;
|
||||||
|
|
||||||
// Choose best move. For each move score we add two terms, both dependent on
|
// Choose best move. For each move score we add two terms, both dependent on
|
||||||
// weakness. One is deterministic and bigger for weaker levels, and one is
|
// weakness. One is deterministic and bigger for weaker levels, and one is
|
||||||
|
@ -1789,8 +1786,8 @@ moves_loop: // When in check, search starts here
|
||||||
for (size_t i = 0; i < multiPV; ++i)
|
for (size_t i = 0; i < multiPV; ++i)
|
||||||
{
|
{
|
||||||
// This is our magic formula
|
// This is our magic formula
|
||||||
int push = ( weakness * int(topScore - rootMoves[i].score)
|
int push = int(( weakness * int(topScore - rootMoves[i].score)
|
||||||
+ delta * (rng.rand<unsigned>() % weakness)) / 128;
|
+ delta * (rng.rand<unsigned>() % int(weakness))) / 128);
|
||||||
|
|
||||||
if (rootMoves[i].score + push >= maxScore)
|
if (rootMoves[i].score + push >= maxScore)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue