Merge remote-tracking branch 'remotes/nodchip/master' into trainer

2020-09-19 15:41:30 +08:00 · 2020-09-19 15:41:30 +08:00 · 411adab149
parent 26f63fe741 61bc8d12d3
commit 411adab149
57 changed files with 9136 additions and 1469 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -1,5 +1,5 @@
 language: cpp
-dist: bionic
+dist: focal

 matrix:
  include:
@ -7,9 +7,9 @@ matrix:
      compiler: gcc
      addons:
        apt:
-          packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl', 'libopenblas-dev']
+          packages: ['g++-multilib', 'valgrind', 'expect', 'curl', 'libopenblas-dev']
      env:
-        - COMPILER=g++-8
+        - COMPILER=g++
        - COMP=gcc

 #    - os: linux
@ -67,21 +67,11 @@ script:
  - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref
  # TODO avoid _mm_malloc
  # - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi
-  # - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi
-  # TODO avoid _mm_malloc
-  # - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
-  # - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi
-  # workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu
-  - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi
-
-  # start some basic learner CI
-  - export CXXFLAGS="-Werror"
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && LDFLAGS="-lstdc++fs" make -j2 ARCH=x86-64-modern learn; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && LDFLAGS="-lstdc++fs" make -j2 ARCH=x86-64-modern profile-learn; fi
+  - make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref

  # compile only for some more advanced architectures (might not run in travis)
+  - make clean && make -j2 ARCH=x86-64-avx2 blas=yes build
+
  - make clean && make -j2 ARCH=x86-64-avx2 build
  - make clean && make -j2 ARCH=x86-64-bmi2 build
  - make clean && make -j2 ARCH=x86-64-avx512 build
@ -98,18 +88,16 @@ script:
  # Valgrind
  #
  - export CXXFLAGS="-O1 -fno-inline"
-  - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
-  - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
+  - make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind
+  - ../tests/instrumented.sh --valgrind-thread

  #
  # Sanitizer
  #
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
+  - make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined
+  - make clean && make -j2 ARCH=x86-64-modern sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread

-  #
-  # NNUE testing / TODO should work with debug=yes as well
-  #
+  # NNUE testing
  - export CXXFLAGS="-O1 -fno-inline"
-  - if [ -x "$(command -v valgrind )" ]; then make clean && LDFLAGS="-lstdc++fs"  make -j2 ARCH=x86-64-modern debug=no optimize=no learn > /dev/null && ../tests/instrumented_learn.sh --valgrind; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && LDFLAGS="-lstdc++fs"  make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=no learn > /dev/null && ../tests/instrumented_learn.sh --sanitizer-undefined; fi
+  - make clean && make -j2 ARCH=x86-64-modern debug=no optimize=no build > /dev/null && ../tests/instrumented_learn.sh --valgrind
+  - make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented_learn.sh --sanitizer-undefined
--- a/README.md
+++ b/README.md
@ -7,6 +7,31 @@
 ## Overview
 Stockfish NNUE is a port of a shogi neural network named NNUE (efficiently updateable neural network backwards) to Stockfish 11. To learn more about the Stockfish chess engine, look [here](stockfish.md) for an overview and [here](https://github.com/official-stockfish/Stockfish) for the official repository.

+## Building
+To compile:
+```
+make -jN ARCH=... build
+```
+
+To compile with Profile Guided Optimizations. Requires that the computer that is used for compilation supports the selected `ARCH`.
+```
+make -jN ARCH=... profile-build
+```
+
+`N` is the number of threads to use for compilation.
+
+`ARCH` is one of:
+`x86-64-vnni512`, `x86-64-vnni256`, `x86-64-avx512`, `x86-64-bmi2`, `x86-64-avx2`,
+`x86-64-sse41-popcnt`, `x86-64-modern`, `x86-64-ssse3`, `x86-64-sse3-popcnt`,
+`x86-64`, `x86-32-sse41-popcnt`, `x86-32-sse2`, `x86-32`, `ppc-64`, `ppc-32,
+armv7`, `armv7-neon`, `armv8`, `apple-silicon`, `general-64`, `general-32`.
+
+`ARCH` needs to be chosen based based on the instruction set of the CPU that will run stockfish. `x86-64-modern` will produce a binary that works on most common processors, but other options may increase performance for specific hardware.
+
+Additional options:
+
+- `blas=[yes/no]` - whether to use an external BLAS library. Default is `no`. Using an external BLAS library may have a significantly improve learning performance and by default expects openBLAS to be installed.
+
 ## Training Guide
 ### Generating Training Data
 To generate training data from the classic eval, use the gensfen command with the setting "Use NNUE" set to "false". The given example is generation in its simplest form. There are more commands. 
@ -45,7 +70,7 @@ Nets get saved in the "evalsave" folder.
 - lambda is the amount of weight it puts to eval of learning data vs win/draw/loss results. 1 puts all weight on eval, lambda 0 puts all weight on WDL results.

 ### Reinforcement Learning
-If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries with the setting `Use NNUE` set to true. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
+If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries with the setting `Use NNUE` set to `pure`. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.

 After you have generated the training data, you must move it into your training data folder and delete the older data so that the binary does not accidentally train on the same data again. Do the same for the validation data and name it to val-1.bin to make it less confusing. Make sure the evalsave folder is empty. Then, using the same binary, type in the training commands shown above. Do __NOT__ set `SkipLoadingEval` to true, it must be false or you will get a completely new network, instead of a network trained with reinforcement learning. You should also set eval_save_interval to a number that is lower than the amount of positions in your training data, perhaps also 1/10 of the original value. The validation file should be set to the new validation data, not the old data.

--- a/src/Makefile
+++ b/src/Makefile
@ -39,8 +39,9 @@ PREFIX = /usr/local
 BINDIR = $(PREFIX)/bin

 ### Built-in benchmark for pgo-builds
+PGO_TRAINING_DATA_FILE = pgo_training_data.bin
 PGOBENCH = ./$(EXE) bench
-PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000
+PGOGENSFEN = ./$(EXE) gensfen depth 6 loop 10000 output_file_name $(PGO_TRAINING_DATA_FILE)

 ### Source and object files
 SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
@ -55,8 +56,8 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	nnue/features/castling_right.cpp \
 	nnue/features/enpassant.cpp \
 	nnue/nnue_test_command.cpp \
-	extra/sfen_packer.cpp \
-	learn/learner.cpp \
+	learn/sfen_packer.cpp \
+	learn/learn.cpp \
 	learn/gensfen.cpp \
 	learn/convert.cpp \
 	learn/learning_tools.cpp \
@ -116,6 +117,7 @@ else
   SUPPORTED_ARCH=false
 endif

+blas = no
 optimize = yes
 debug = no
 sanitize = no
@ -135,20 +137,6 @@ vnni512 = no
 neon = no
 STRIP = strip

-### BLAS libraries
-ifeq ($(KERNEL),Linux)
-	BLASCXXFLAGS =
-	BLASLDFLAGS = -lopenblas
-else
-	BLASCXXFLAGS = -I/mingw64/include/OpenBLAS
-
-	ifeq ($(debug),yes)
-		BLASLDFLAGS = -lopenblas -Wl,-static
-	else
-		BLASLDFLAGS = -lopenblas -Wl,-s -static
-	endif
-endif
-
 ### 2.2 Architecture specific

 ifeq ($(findstring x86,$(ARCH)),x86)
@ -325,9 +313,9 @@ endif
 ### ==========================================================================

 ### 3.1 Selecting compiler (default = gcc)
-CXXFLAGS += -g -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(LEARNCXXFLAGS)
-DEPENDFLAGS += -std=c++17
-LDFLAGS += $(EXTRALDFLAGS) $(LEARNLDFLAGS)
+CXXFLAGS += -g -Wall -Wcast-qual -fno-exceptions -std=c++17 -fopenmp -I. $(EXTRACXXFLAGS)
+LDFLAGS += -fopenmp $(EXTRALDFLAGS)
+DEPENDFLAGS += -std=c++17 -I.

 ifeq ($(COMP),)
 	COMP=gcc
@ -479,14 +467,33 @@ ifneq ($(comp),mingw)
 endif
 endif

-### 3.2.1 Debugging
+### 3.2.1. BLAS libraries
+ifeq ($(blas), yes)
+	LDFLAGS += -lopenblas
+
+	ifeq ($(KERNEL),Linux)
+		LDFLAGS +=
+	else
+		CXXFLAGS += -I/mingw64/include/OpenBLAS
+
+		ifeq ($(debug),yes)
+			LDFLAGS += -Wl,-static
+		else
+			LDFLAGS += -Wl,-s -static
+		endif
+	endif
+
+	CXXFLAGS += -DUSE_BLAS
+endif
+
+### 3.2.2 Debugging
 ifeq ($(debug),no)
 	CXXFLAGS += -DNDEBUG
 else
 	CXXFLAGS += -g
 endif

-### 3.2.2 Debugging with undefined behavior sanitizers
+### 3.2.3 Debugging with undefined behavior sanitizers
 ifneq ($(sanitize),no)
        CXXFLAGS += -g3 -fsanitize=$(sanitize)
        LDFLAGS += -fsanitize=$(sanitize)
@ -740,16 +747,17 @@ endif
        config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
        clang-profile-use clang-profile-make

-build: config-sanity net
+build: config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all

-profile-build: net config-sanity objclean profileclean
+profile-build: config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
 	@echo ""
 	@echo "Step 2/4. Running benchmark for pgo-build ..."
 	$(PGOBENCH) > /dev/null
+	$(PGOGENSFEN) > /dev/null
 	@echo ""
 	@echo "Step 3/4. Building optimized executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
@ -803,6 +811,7 @@ profileclean:
 	@rm -rf profdir
 	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda
 	@rm -f stockfish.profdata *.profraw
+	@rm -f $(PGO_TRAINING_DATA_FILE)

 default:
 	help
@ -907,33 +916,7 @@ icc-profile-use:
 	EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
 	all

-learn: config-sanity
-	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
-	EXTRACXXFLAGS=' -DEVAL_LEARN -DNNUE_EMBEDDING_OFF -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
-	EXTRALDFLAGS=' $(BLASLDFLAGS) -fopenmp  ' \
-	all
-
-profile-learn: config-sanity objclean profileclean
-	@echo ""
-	@echo "Step 1/4. Building instrumented executable ..."
-	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DNNUE_EMBEDDING_OFF -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
-	LEARNLDFLAGS='  $(BLASLDFLAGS) -fopenmp '
-	@echo ""
-	@echo "Step 2/4. Running benchmark for pgo-build ..."
-	$(PGOGENSFEN)
-	@echo ""
-	@echo "Step 3/4. Building optimized executable ..."
-	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
-	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DNNUE_EMBEDDING_OFF -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
-	LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp '
-	@echo ""
-	@echo "Step 4/4. Deleting profile data ..."
-	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
-	rm generated_kifu.bin
-
 .depend:
-	-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
+	-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@

 -include .depend
--- a/src/eval/evaluate_common.h
+++ b/src/eval/evaluate_common.h
@ -1,8 +1,6 @@
 #ifndef _EVALUATE_COMMON_H_
 #define _EVALUATE_COMMON_H_

-#if defined(EVAL_LEARN)
-
 // A common header-like function for modern evaluation functions.
 #include <string>

@ -20,6 +18,4 @@ namespace Eval
 	double get_eta();
 }

-#endif // defined(EVAL_LEARN)
-
 #endif // _EVALUATE_COMMON_H_
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@ -36,42 +36,30 @@
 #include "uci.h"
 #include "incbin/incbin.h"

-
-// Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler).
-// This macro invocation will declare the following three variables
-//     const unsigned char        gEmbeddedNNUEData[];  // a pointer to the embedded data
-//     const unsigned char *const gEmbeddedNNUEEnd;     // a marker to the end
-//     const unsigned int         gEmbeddedNNUESize;    // the size of the embedded file
-// Note that this does not work in Microsof Visual Studio.
-#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
-  INCBIN(EmbeddedNNUE, EvalFileDefaultName);
-#else
-  const unsigned char        gEmbeddedNNUEData[1] = {0x0};
-  const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1];
-  const unsigned int         gEmbeddedNNUESize = 1;
-#endif
-
-
 using namespace std;
 using namespace Eval::NNUE;

 namespace Eval {

-  bool useNNUE;
+  UseNNUEMode useNNUE;
  string eval_file_loaded = "None";

-  /// init_NNUE() tries to load a nnue network at startup time, or when the engine
-  /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
-  /// The name of the nnue network is always retrieved from the EvalFile option.
-  /// We search the given network in three locations: internally (the default
-  /// network may be embedded in the binary), in the active working directory and
-  /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY
-  /// variable to have the engine search in a special directory in their distro.
+  static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
+  {
+    if (mode == "false")
+      return UseNNUEMode::False;
+    else if (mode == "true")
+      return UseNNUEMode::True;
+    else if (mode == "pure")
+      return UseNNUEMode::Pure;
+
+    return UseNNUEMode::False;
+  }

  void init_NNUE() {

-    useNNUE = Options["Use NNUE"];
-    if (!useNNUE)
+    useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
+    if (useNNUE == UseNNUEMode::False)
        return;

    string eval_file = string(Options["EvalFile"]);
@ -79,35 +67,17 @@ namespace Eval {
    #if defined(DEFAULT_NNUE_DIRECTORY)
    #define stringify2(x) #x
    #define stringify(x) stringify2(x)
-    vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
+    vector<string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
    #else
-    vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory };
+    vector<string> dirs = { "" , CommandLine::binaryDirectory };
    #endif

    for (string directory : dirs)
        if (eval_file_loaded != eval_file)
        {
-            if (directory != "<internal>")
-            {
-                ifstream stream(directory + eval_file, ios::binary);
-                if (load_eval(eval_file, stream))
-                    eval_file_loaded = eval_file;
-            }
-
-            if (directory == "<internal>" && eval_file == EvalFileDefaultName)
-            {
-                // C++ way to prepare a buffer for a memory stream
-                class MemoryBuffer : public basic_streambuf<char> {
-                    public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); }
-                };
-
-                MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
-                                    size_t(gEmbeddedNNUESize));
-
-                istream stream(&buffer);
-                if (load_eval(eval_file, stream))
-                    eval_file_loaded = eval_file;
-            }
+            ifstream stream(directory + eval_file, ios::binary);
+            if (load_eval(eval_file, stream))
+                eval_file_loaded = eval_file;
        }
  }

@ -116,7 +86,7 @@ namespace Eval {

    string eval_file = string(Options["EvalFile"]);

-    if (useNNUE && eval_file_loaded != eval_file)
+    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
    {
        UCI::OptionsMap defaults;
        UCI::init(defaults);
@ -136,7 +106,7 @@ namespace Eval {
        exit(EXIT_FAILURE);
    }

-    if (useNNUE)
+    if (useNNUE != UseNNUEMode::False)
        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
    else
        sync_cout << "info string classical evaluation enabled" << sync_endl;
@ -1014,32 +984,32 @@ make_v:
 /// evaluation of the position from the point of view of the side to move.

 Value Eval::evaluate(const Position& pos) {
-
-  if (Options["Training"]) {
-    return NNUE::evaluate(pos);
-  } else {
-    // Use classical eval if there is a large imbalance
-    // If there is a moderate imbalance, use classical eval with probability (1/8),
-    // as derived from the node counter.
-    bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
-    bool classical = !Eval::useNNUE
-                  ||  useClassical
-                  || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
-    Value v = classical ? Evaluation<NO_TRACE>(pos).value() : NNUE::evaluate(pos);
-
-    if (   useClassical 
-        && Eval::useNNUE 
-        && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
-        v = NNUE::evaluate(pos);
-
-    // Damp down the evaluation linearly when shuffling
-    v = v * (100 - pos.rule50_count()) / 100;
-
-    // Guarantee evaluation does not hit the tablebase range
-    v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
-
-    return v;
+  if (useNNUE == UseNNUEMode::Pure) {
+      return NNUE::evaluate(pos);
  }
+
+  // Use classical eval if there is a large imbalance
+  // If there is a moderate imbalance, use classical eval with probability (1/8),
+  // as derived from the node counter.
+  bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
+  bool classical = (useNNUE == UseNNUEMode::False)
+                ||  useClassical
+                || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
+  Value v = classical ? Evaluation<NO_TRACE>(pos).value()
+                      : NNUE::evaluate(pos);
+
+  if (   useClassical
+      && useNNUE != UseNNUEMode::False
+      && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
+      v = NNUE::evaluate(pos);
+
+  // Damp down the evaluation linearly when shuffling
+  v = v * (100 - pos.rule50_count()) / 100;
+
+  // Guarantee evaluation does not hit the tablebase range
+  v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
+
+  return v;
 }

 /// trace() is like evaluate(), but instead of returning a value, it returns
@ -1087,7 +1057,7 @@ std::string Eval::trace(const Position& pos) {

  ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";

-  if (Eval::useNNUE)
+  if (useNNUE != UseNNUEMode::False)
  {
      v = NNUE::evaluate(pos);
      v = pos.side_to_move() == WHITE ? v : -v;
--- a/src/evaluate.h
+++ b/src/evaluate.h
@ -26,11 +26,17 @@
 class Position;

 namespace Eval {
+  enum struct UseNNUEMode
+  {
+    False,
+    True,
+    Pure
+  };

  std::string trace(const Position& pos);
  Value evaluate(const Position& pos);

-  extern bool useNNUE;
+  extern UseNNUEMode useNNUE;
  extern std::string eval_file_loaded;
  void init_NNUE();
  void verify_NNUE();
--- a/src/extra/nnue_data_binpack_format.h
+++ b/src/extra/nnue_data_binpack_format.h
--- a/src/extra/sfen_packer.cpp
+++ b/src/extra/sfen_packer.cpp
@ -1,432 +0,0 @@
-#if defined (EVAL_LEARN)
-
-#include "../misc.h"
-#include "../position.h"
-
-#include <sstream>
-#include <fstream>
-#include <cstring> // std::memset()
-
-using namespace std;
-
-// -----------------------------------
-// stage compression/decompression
-// -----------------------------------
-
-// Class that handles bitstream
-// useful when doing aspect encoding
-struct BitStream
-{
-  // Set the memory to store the data in advance.
-  // Assume that memory is cleared to 0.
-  void  set_data(uint8_t* data_) { data = data_; reset(); }
-
-  // Get the pointer passed in set_data().
-  uint8_t* get_data() const { return data; }
-
-  // Get the cursor.
-  int get_cursor() const { return bit_cursor; }
-
-  // reset the cursor
-  void reset() { bit_cursor = 0; }
-
-  // Write 1bit to the stream.
-  // If b is non-zero, write out 1. If 0, write 0.
-  void write_one_bit(int b)
-  {
-    if (b)
-      data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
-
-    ++bit_cursor;
-  }
-
-  // Get 1 bit from the stream.
-  int read_one_bit()
-  {
-    int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
-    ++bit_cursor;
-
-    return b;
-  }
-
-  // write n bits of data
-  // Data shall be written out from the lower order of d.
-  void write_n_bit(int d, int n)
-  {
-    for (int i = 0; i <n; ++i)
-      write_one_bit(d & (1 << i));
-  }
-
-  // read n bits of data
-  // Reverse conversion of write_n_bit().
-  int read_n_bit(int n)
-  {
-    int result = 0;
-    for (int i = 0; i < n; ++i)
-      result |= read_one_bit() ? (1 << i) : 0;
-
-    return result;
-  }
-
-private:
-  // Next bit position to read/write.
-  int bit_cursor;
-
-  // data entity
-  uint8_t* data;
-};
-
-
-// Huffman coding
-// * is simplified from mini encoding to make conversion easier.
-//
-// 1 box on the board (other than NO_PIECE) = 2 to 6 bits (+ 1-bit flag + 1-bit forward and backward)
-// 1 piece of hand piece = 1-5bit (+ 1-bit flag + 1bit ahead and behind)
-//
-// empty xxxxx0 + 0 (none)
-// step xxxx01 + 2 xxxx0 + 2
-// incense xx0011 + 2 xx001 + 2
-// Katsura xx1011 + 2 xx101 + 2
-// silver xx0111 + 2 xx011 + 2
-// Gold x01111 + 1 x0111 + 1 // Gold is valid and has no flags.
-// corner 011111 + 2 01111 + 2
-// Fly 111111 + 2 11111 + 2
-//
-// Assuming all pieces are on the board,
-// Sky 81-40 pieces = 41 boxes = 41bit
-// Walk 4bit*18 pieces = 72bit
-// Incense 6bit*4 pieces = 24bit
-// Katsura 6bit*4 pieces = 24bit
-// Silver 6bit*4 pieces = 24bit
-// Gold 6bit* 4 pieces = 24bit
-// corner 8bit* 2 pieces = 16bit
-// Fly 8bit* 2 pieces = 16bit
-// -------
-// 241bit + 1bit (turn) + 7bit × 2 (King's position after) = 256bit
-//
-// When the piece on the board moves to the hand piece, the piece on the board becomes empty, so the box on the board can be expressed with 1 bit,
-// Since the hand piece can be expressed by 1 bit less than the piece on the board, the total number of bits does not change in the end.
-// Therefore, in this expression, any aspect can be expressed by this bit number.
-// It is a hand piece and no flag is required, but if you include this, the bit number of the piece on the board will be -1
-// Since the total number of bits can be fixed, we will include this as well.
-
-// Huffman Encoding
-//
-// Empty  xxxxxxx0
-// Pawn   xxxxx001 + 1 bit (Side to move)
-// Knight xxxxx011 + 1 bit (Side to move)
-// Bishop xxxxx101 + 1 bit (Side to move)
-// Rook   xxxxx111 + 1 bit (Side to move)
-
-struct HuffmanedPiece
-{
-  int code; // how it will be coded
-  int bits; // How many bits do you have
-};
-
-HuffmanedPiece huffman_table[] =
-{
-  {0b0000,1}, // NO_PIECE
-  {0b0001,4}, // PAWN
-  {0b0011,4}, // KNIGHT
-  {0b0101,4}, // BISHOP
-  {0b0111,4}, // ROOK
-  {0b1001,4}, // QUEEN
-};
-
-// Class for compressing/decompressing sfen
-// sfen can be packed to 256bit (32bytes) by Huffman coding.
-// This is proven by mini. The above is Huffman coding.
-//
-// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
-// Side to move (White = 0, Black = 1) (1bit)
-// White King Position (6 bits)
-// Black King Position (6 bits)
-// Huffman Encoding of the board
-// Castling availability (1 bit x 4)
-// En passant square (1 or 1 + 6 bits)
-// Rule 50 (6 bits)
-// Game play (8 bits)
-//
-// TODO(someone): Rename SFEN to FEN.
-//
-struct SfenPacker
-{
-  // Pack sfen and store in data[32].
-  void pack(const Position& pos)
-  {
-// cout << pos;
-
-    memset(data, 0, 32 /* 256bit */);
-    stream.set_data(data);
-
-    // turn
-    // Side to move.
-    stream.write_one_bit((int)(pos.side_to_move()));
-
-    // 7-bit positions for leading and trailing balls
-    // White king and black king, 6 bits for each.
-    for(auto c: Colors)
-      stream.write_n_bit(pos.king_square(c), 6);
-
-    // Write the pieces on the board other than the kings.
-    for (Rank r = RANK_8; r >= RANK_1; --r)
-    {
-      for (File f = FILE_A; f <= FILE_H; ++f)
-      {
-        Piece pc = pos.piece_on(make_square(f, r));
-        if (type_of(pc) == KING)
-          continue;
-        write_board_piece_to_stream(pc);
-      }
-    }
-
-    // TODO(someone): Support chess960.
-    stream.write_one_bit(pos.can_castle(WHITE_OO));
-    stream.write_one_bit(pos.can_castle(WHITE_OOO));
-    stream.write_one_bit(pos.can_castle(BLACK_OO));
-    stream.write_one_bit(pos.can_castle(BLACK_OOO));
-
-    if (pos.ep_square() == SQ_NONE) {
-      stream.write_one_bit(0);
-    }
-    else {
-      stream.write_one_bit(1);
-      stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
-    }
-
-    stream.write_n_bit(pos.state()->rule50, 6);
-
-    stream.write_n_bit(1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2, 8);
-
-    assert(stream.get_cursor() <= 256);
-  }
-
-  // sfen packed by pack() (256bit = 32bytes)
-  // Or sfen to decode with unpack()
-  uint8_t *data; // uint8_t[32];
-
-//private:
-  // Position::set_from_packed_sfen(uint8_t data[32]) I want to use these functions, so the line is bad, but I want to keep it public.
-
-  BitStream stream;
-
-  // Output the board pieces to stream.
-  void write_board_piece_to_stream(Piece pc)
-  {
-    // piece type
-    PieceType pr = type_of(pc);
-    auto c = huffman_table[pr];
-    stream.write_n_bit(c.code, c.bits);
-
-    if (pc == NO_PIECE)
-      return;
-
-    // first and second flag
-    stream.write_one_bit(color_of(pc));
-  }
-
-  // Read one board piece from stream
-  Piece read_board_piece_from_stream()
-  {
-    PieceType pr = NO_PIECE_TYPE;
-    int code = 0, bits = 0;
-    while (true)
-    {
-      code |= stream.read_one_bit() << bits;
-      ++bits;
-
-      assert(bits <= 6);
-
-      for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
-        if (huffman_table[pr].code == code
-          && huffman_table[pr].bits == bits)
-          goto Found;
-    }
-  Found:;
-    if (pr == NO_PIECE_TYPE)
-      return NO_PIECE;
-
-    // first and second flag
-    Color c = (Color)stream.read_one_bit();
-
-    return make_piece(c, pr);
-  }
-};
-
-
-// -----------------------------------
-// Add to Position class
-// -----------------------------------
-
-// Add a function that directly unpacks for speed. It's pretty tough.
-// Write it by combining packer::unpack() and Position::set().
-// If there is a problem with the passed phase and there is an error, non-zero is returned.
-int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thread* th, bool mirror)
-{
-	SfenPacker packer;
-	auto& stream = packer.stream;
-
-  // TODO: separate streams for writing and reading. Here we actually have to
-  // const_cast which is not safe in the long run.
-	stream.set_data(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(&sfen)));
-
-	std::memset(this, 0, sizeof(Position));
-	std::memset(si, 0, sizeof(StateInfo));
-  std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
-  st = si;
-
-	// Active color
-	sideToMove = (Color)stream.read_one_bit();
-
-  pieceList[W_KING][0] = SQUARE_NB;
-  pieceList[B_KING][0] = SQUARE_NB;
-
-	// First the position of the ball
-	if (mirror)
-	{
-		for (auto c : Colors)
-			board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
-	}
-	else
-	{
-		for (auto c : Colors)
-			board[stream.read_n_bit(6)] = make_piece(c, KING);
-	}
-
-  // Piece placement
-  for (Rank r = RANK_8; r >= RANK_1; --r)
-  {
-    for (File f = FILE_A; f <= FILE_H; ++f)
-    {
-      auto sq = make_square(f, r);
-      if (mirror) {
-        sq = flip_file(sq);
-      }
-
-      // it seems there are already balls
-      Piece pc;
-      if (type_of(board[sq]) != KING)
-      {
-        assert(board[sq] == NO_PIECE);
-        pc = packer.read_board_piece_from_stream();
-      }
-      else
-      {
-        pc = board[sq];
-        board[sq] = NO_PIECE; // put_piece() will catch ASSERT unless you remove it all.
-      }
-
-      // There may be no pieces, so skip in that case.
-      if (pc == NO_PIECE)
-        continue;
-
-      put_piece(Piece(pc), sq);
-
-      //cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl;
-
-      if (stream.get_cursor()> 256)
-        return 1;
-      //assert(stream.get_cursor() <= 256);
-
-    }
-  }
-
-  // Castling availability.
-  // TODO(someone): Support chess960.
-  st->castlingRights = 0;
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(WHITE, SQ_H1); piece_on(rsq) != W_ROOK; --rsq) {}
-    set_castling_right(WHITE, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(WHITE, SQ_A1); piece_on(rsq) != W_ROOK; ++rsq) {}
-    set_castling_right(WHITE, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(BLACK, SQ_H1); piece_on(rsq) != B_ROOK; --rsq) {}
-    set_castling_right(BLACK, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(BLACK, SQ_A1); piece_on(rsq) != B_ROOK; ++rsq) {}
-    set_castling_right(BLACK, rsq);
-  }
-
-  // En passant square. Ignore if no pawn capture is possible
-  if (stream.read_one_bit()) {
-    Square ep_square = static_cast<Square>(stream.read_n_bit(6));
-    if (mirror) {
-      ep_square = flip_file(ep_square);
-    }
-    st->epSquare = ep_square;
-
-    if (!(attackers_to(st->epSquare) & pieces(sideToMove, PAWN))
-      || !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))))
-      st->epSquare = SQ_NONE;
-  }
-  else {
-    st->epSquare = SQ_NONE;
-  }
-
-  // Halfmove clock
-  st->rule50 = static_cast<Square>(stream.read_n_bit(6));
-
-  // Fullmove number
-  gamePly = static_cast<Square>(stream.read_n_bit(8));
-  // Convert from fullmove starting from 1 to gamePly starting from 0,
-  // handle also common incorrect FEN with fullmove = 0.
-  gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
-
-  assert(stream.get_cursor() <= 256);
-
-  chess960 = false;
-  thisThread = th;
-set_state(st);
-
-  //std::cout << *this << std::endl;
-
-  assert(pos_is_ok());
-
-	return 0;
-}
-
-// Give the board, hand piece, and turn, and return the sfen.
-//std::string Position::sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly_)
-//{
-// // Copy it to an internal structure and call sfen() if the conversion process depends only on it
-// // Maybe it will be converted normally...
-//  Position pos;
-//
-//  memcpy(pos.board, board, sizeof(Piece) * 81);
-//  memcpy(pos.hand, hands, sizeof(Hand) * 2);
-//  pos.sideToMove = turn;
-//  pos.gamePly = gamePly_;
-//
-//  return pos.sfen();
-//
-// // Implementation of ↑ is beautiful, but slow.
-// // This is a bottleneck when learning a large amount of game records, so write a function to unpack directly.
-//}
-
-// Get the packed sfen. Returns to the buffer specified in the argument.
-void Position::sfen_pack(PackedSfen& sfen)
-{
-  SfenPacker sp;
-  sp.data = (uint8_t*)&sfen;
-  sp.pack(*this);
-}
-
-//// Unpack the packed sfen. Returns an sfen string.
-//std::string Position::sfen_unpack(const PackedSfen& sfen)
-//{
-// SfenPacker sp;
-// sp.data = (uint8_t*)&sfen;
-// return sp.unpack();
-//}
-
-
-#endif // USE_SFEN_PACKER
--- a/src/learn/convert.cpp
+++ b/src/learn/convert.cpp
@ -1,16 +1,19 @@
-#if defined(EVAL_LEARN)
+#include "convert.h"
+
+#include "multi_think.h"
+
+#include "uci.h"
+#include "misc.h"
+#include "thread.h"
+#include "position.h"
+#include "tt.h"

 // evaluate header for learning
-#include "../eval/evaluate_common.h"
+#include "eval/evaluate_common.h"

-#include "learn.h"
-#include "multi_think.h"
-#include "../uci.h"
-#include "../syzygy/tbprobe.h"
-#include "../misc.h"
-#include "../thread.h"
-#include "../position.h"
-#include "../tt.h"
+#include "extra/nnue_data_binpack_format.h"
+
+#include "syzygy/tbprobe.h"

 #include <sstream>
 #include <fstream>
@ -119,7 +122,7 @@ namespace Learner
                else if (token == "score") {
                    double score;
                    ss >> score;
-                    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
+                    // Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
                    // Normalize to [0.0, 1.0].
                    score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value);
                    // Scale to [dest_score_min_value, dest_score_max_value].
@ -497,5 +500,107 @@ namespace Learner
        ofs.close();
        std::cout << "all done" << std::endl;
    }
+
+    static inline const std::string plain_extension = ".plain";
+    static inline const std::string bin_extension = ".bin";
+    static inline const std::string binpack_extension = ".binpack";
+
+    static bool file_exists(const std::string& name)
+    {
+        std::ifstream f(name);
+        return f.good();
+    }
+
+    static bool ends_with(const std::string& lhs, const std::string& end)
+    {
+        if (end.size() > lhs.size()) return false;
+
+        return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
+    }
+
+    static bool is_convert_of_type(
+        const std::string& input_path,
+        const std::string& output_path,
+        const std::string& expected_input_extension,
+        const std::string& expected_output_extension)
+    {
+        return ends_with(input_path, expected_input_extension)
+            && ends_with(output_path, expected_output_extension);
+    }
+
+    using ConvertFunctionType = void(std::string inputPath, std::string outputPath, std::ios_base::openmode om);
+
+    static ConvertFunctionType* get_convert_function(const std::string& input_path, const std::string& output_path)
+    {
+        if (is_convert_of_type(input_path, output_path, plain_extension, bin_extension))
+            return binpack::convertPlainToBin;
+        if (is_convert_of_type(input_path, output_path, plain_extension, binpack_extension))
+            return binpack::convertPlainToBinpack;
+
+        if (is_convert_of_type(input_path, output_path, bin_extension, plain_extension))
+            return binpack::convertBinToPlain;
+        if (is_convert_of_type(input_path, output_path, bin_extension, binpack_extension))
+            return binpack::convertBinToBinpack;
+
+        if (is_convert_of_type(input_path, output_path, binpack_extension, plain_extension))
+            return binpack::convertBinpackToPlain;
+        if (is_convert_of_type(input_path, output_path, binpack_extension, bin_extension))
+            return binpack::convertBinpackToBin;
+
+        return nullptr;
+    }
+
+    static void convert(const std::string& input_path, const std::string& output_path, std::ios_base::openmode om)
+    {
+        if(!file_exists(input_path))
+        {
+            std::cerr << "Input file does not exist.\n";
+            return;
+        }
+
+        auto func = get_convert_function(input_path, output_path);
+        if (func != nullptr)
+        {
+            func(input_path, output_path, om);
+        }
+        else
+        {
+            std::cerr << "Conversion between files of these types is not supported.\n";
+        }
+    }
+
+    static void convert(const std::vector<std::string>& args)
+    {
+        if (args.size() < 2 || args.size() > 3)
+        {
+            std::cerr << "Invalid arguments.\n";
+            std::cerr << "Usage: convert from_path to_path [append]\n";
+            return;
+        }
+
+        const bool append = (args.size() == 3) && (args[2] == "append");
+        const std::ios_base::openmode openmode =
+            append
+            ? std::ios_base::app
+            : std::ios_base::trunc;
+
+        convert(args[0], args[1], openmode);
+    }
+
+    void convert(istringstream& is)
+    {
+        std::vector<std::string> args;
+
+        while (true)
+        {
+            std::string token = "";
+            is >> token;
+            if (token == "")
+                break;
+
+            args.push_back(token);
+        }
+
+        convert(args);
+    }
 }
-#endif
--- a/src/learn/convert.h
+++ b/src/learn/convert.h
@ -0,0 +1,35 @@
+#ifndef _CONVERT_H_
+#define _CONVERT_H_
+
+#include <vector>
+#include <string>
+#include <sstream>
+
+namespace Learner {
+    void convert_bin_from_pgn_extract(
+        const std::vector<std::string>& filenames,
+        const std::string& output_file_name,
+        const bool pgn_eval_side_to_move,
+        const bool convert_no_eval_fens_as_score_zero);
+
+    void convert_bin(
+        const std::vector<std::string>& filenames,
+        const std::string& output_file_name,
+        const int ply_minimum,
+        const int ply_maximum,
+        const int interpolate_eval,
+        const int src_score_min_value,
+        const int src_score_max_value,
+        const int dest_score_min_value,
+        const int dest_score_max_value,
+        const bool check_invalid_fen,
+        const bool check_illegal_move);
+
+    void convert_plain(
+        const std::vector<std::string>& filenames,
+        const std::string& output_file_name);
+
+    void convert(std::istringstream& is);
+}
+
+#endif
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@ -1,17 +1,23 @@
-#if defined(EVAL_LEARN)
+#include "gensfen.h"

-#include "../eval/evaluate_common.h"
-#include "../misc.h"
-#include "../nnue/evaluate_nnue_learner.h"
-#include "../position.h"
-#include "../syzygy/tbprobe.h"
-#include "../thread.h"
-#include "../tt.h"
-#include "../uci.h"
-#include "learn.h"
+#include "packed_sfen.h"
 #include "multi_think.h"
 #include "../syzygy/tbprobe.h"

+#include "misc.h"
+#include "position.h"
+#include "thread.h"
+#include "tt.h"
+#include "uci.h"
+
+#include "eval/evaluate_common.h"
+
+#include "extra/nnue_data_binpack_format.h"
+
+#include "nnue/evaluate_nnue_learner.h"
+
+#include "syzygy/tbprobe.h"
+
 #include <chrono>
 #include <climits>
 #include <cmath>
@ -33,11 +39,107 @@ using namespace std;

 namespace Learner
 {
+    enum struct SfenOutputType
+    {
+        Bin,
+        Binpack
+    };
+
    static bool write_out_draw_game_in_training_data_generation = false;
    static bool detect_draw_by_consecutive_low_score = false;
    static bool detect_draw_by_insufficient_mating_material = false;

    static std::vector<std::string> bookStart;
+    static SfenOutputType sfen_output_type = SfenOutputType::Bin;
+
+    static bool ends_with(const std::string& lhs, const std::string& end)
+    {
+        if (end.size() > lhs.size()) return false;
+
+        return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
+    }
+
+    static std::string filename_with_extension(const std::string& filename, const std::string& ext)
+    {
+        if (ends_with(filename, ext))
+        {
+            return filename;
+        }
+        else
+        {
+            return filename + "." + ext;
+        }
+    }
+
+    struct BasicSfenOutputStream
+    {
+        virtual void write(const PSVector& sfens) = 0;
+        virtual ~BasicSfenOutputStream() {}
+    };
+
+    struct BinSfenOutputStream : BasicSfenOutputStream
+    {
+        static constexpr auto openmode = ios::out | ios::binary | ios::app;
+        static inline const std::string extension = "bin";
+
+        BinSfenOutputStream(std::string filename) :
+            m_stream(filename_with_extension(filename, extension), openmode)
+        {
+        }
+
+        void write(const PSVector& sfens) override
+        {
+            m_stream.write(reinterpret_cast<const char*>(sfens.data()), sizeof(PackedSfenValue) * sfens.size());
+        }
+
+        ~BinSfenOutputStream() override {}
+
+    private:
+        fstream m_stream;
+    };
+
+    struct BinpackSfenOutputStream : BasicSfenOutputStream
+    {
+        static constexpr auto openmode = ios::out | ios::binary | ios::app;
+        static inline const std::string extension = "binpack";
+
+        BinpackSfenOutputStream(std::string filename) :
+            m_stream(filename_with_extension(filename, extension), openmode)
+        {
+        }
+
+        void write(const PSVector& sfens) override
+        {
+            static_assert(sizeof(binpack::nodchip::PackedSfenValue) == sizeof(PackedSfenValue));
+
+            for(auto& sfen : sfens)
+            {
+                // The library uses a type that's different but layout-compatibile.
+                binpack::nodchip::PackedSfenValue e;
+                std::memcpy(&e, &sfen, sizeof(binpack::nodchip::PackedSfenValue));
+                m_stream.addTrainingDataEntry(binpack::packedSfenValueToTrainingDataEntry(e));
+            }
+        }
+
+        ~BinpackSfenOutputStream() override {}
+
+    private:
+        binpack::CompressedTrainingDataEntryWriter m_stream;
+    };
+
+    static std::unique_ptr<BasicSfenOutputStream> create_new_sfen_output(const std::string& filename)
+    {
+        switch(sfen_output_type)
+        {
+            case SfenOutputType::Bin:
+                return std::make_unique<BinSfenOutputStream>(filename);
+            case SfenOutputType::Binpack:
+                return std::make_unique<BinpackSfenOutputStream>(filename);
+        }
+
+        assert(false);
+        return nullptr;
+    }

    // Helper class for exporting Sfen
    struct SfenWriter
@ -55,7 +157,7 @@ namespace Learner
            sfen_buffers_pool.reserve((size_t)thread_num * 10);
            sfen_buffers.resize(thread_num);

-            output_file_stream.open(filename_, ios::out | ios::binary | ios::app);
+            output_file_stream = create_new_sfen_output(filename_);
            filename = filename_;

            finished = false;
@ -65,7 +167,7 @@ namespace Learner
        {
            finished = true;
            file_worker_thread.join();
-            output_file_stream.close();
+            output_file_stream.reset();

 #if defined(_DEBUG)
            {
@ -134,9 +236,6 @@ namespace Learner
            {
                // Also output the current time to console.
                sync_cout << endl << sfen_write_count << " sfens , at " << now_string() << sync_endl;
-
-                // This is enough for flush().
-                output_file_stream.flush();
            };

            while (!finished || sfen_buffers_pool.size())
@ -160,7 +259,7 @@ namespace Learner
                {
                    for (auto& buf : buffers)
                    {
-                        output_file_stream.write(reinterpret_cast<const char*>(buf->data()), sizeof(PackedSfenValue) * buf->size());
+                        output_file_stream->write(*buf);

                        sfen_write_count += buf->size();

@ -171,8 +270,6 @@ namespace Learner
                        {
                            sfen_write_count_current_file = 0;

-                            output_file_stream.close();
-
                            // Sequential number attached to the file
                            int n = (int)(sfen_write_count / save_every);

@ -180,7 +277,7 @@ namespace Learner
                            // Add ios::app in consideration of overwriting.
                            // (Depending on the operation, it may not be necessary.)
                            string new_filename = filename + "_" + std::to_string(n);
-                            output_file_stream.open(new_filename, ios::out | ios::binary | ios::app);
+                            output_file_stream = create_new_sfen_output(new_filename);
                            cout << endl << "output sfen file = " << new_filename << endl;
                        }

@ -214,7 +311,7 @@ namespace Learner

    private:

-        fstream output_file_stream;
+        std::unique_ptr<BasicSfenOutputStream> output_file_stream;

        // A new net is saved after every save_every sfens are processed.
        uint64_t save_every = std::numeric_limits<uint64_t>::max();
@ -260,7 +357,8 @@ namespace Learner
        // It must be 2**N because it will be used as the mask to calculate hash_index.
        static_assert((GENSFEN_HASH_SIZE& (GENSFEN_HASH_SIZE - 1)) == 0);

-        MultiThinkGenSfen(int search_depth_min_, int search_depth_max_, SfenWriter& sw_) :
+        MultiThinkGenSfen(int search_depth_min_, int search_depth_max_, SfenWriter& sw_, const std::string& seed) :
+            MultiThink(seed),
            search_depth_min(search_depth_min_),
            search_depth_max(search_depth_max_),
            sfen_writer(sw_)
@ -759,20 +857,6 @@ namespace Learner
                    break;
                }

-                if (pos.count<ALL_PIECES>() <= 6) {
-                    Tablebases::ProbeState probe_state;
-                    Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &probe_state);
-                    assert(wdl != Tablebases::WDLScore::WDLScoreNone);
-                    if (wdl == Tablebases::WDLScore::WDLWin) {
-                        flush_psv(1);
-                    } else if (wdl == Tablebases::WDLScore::WDLLoss) {
-                        flush_psv(-1);
-                    } else {
-                        flush_psv(0);
-                    }
-                    break;
-                }
-
                {
                    auto [search_value, search_pv] = search(pos, depth, 1, nodes);

@ -819,6 +903,25 @@ namespace Learner
                        goto SKIP_SAVE;
                    }

+                    // Look into the position hashtable to see if the same
+                    // position was seen before.
+                    // This is a good heuristic to exlude already seen
+                    // positions without many false positives.
+                    {
+                        auto key = pos.key();
+                        auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
+                        auto old_key = hash[hash_index];
+                        if (key == old_key)
+                        {
+                            goto SKIP_SAVE;
+                        }
+                        else
+                        {
+                            // Replace with the current key.
+                            hash[hash_index] = key;
+                        }
+                    }
+
                    // Pack the current position into a packed sfen and save it into the buffer.
                    {
                        a_psv.emplace_back(PackedSfenValue());
@ -916,7 +1019,7 @@ namespace Learner
        int write_maxply = 400;

        // File name to write
-        string output_file_name = "generated_kifu.bin";
+        string output_file_name = "generated_kifu";

        string token;

@ -927,6 +1030,9 @@ namespace Learner
        // Add a random number to the end of the file name.
        bool random_file_name = false;

+        std::string sfen_format;
+        std::string seed;
+
        while (true)
        {
            token = "";
@ -980,10 +1086,26 @@ namespace Learner
                is >> detect_draw_by_consecutive_low_score;
            else if (token == "detect_draw_by_insufficient_mating_material")
                is >> detect_draw_by_insufficient_mating_material;
+            else if (token == "sfen_format")
+                is >> sfen_format;
+            else if (token == "seed")
+                is >> seed;
            else
                cout << "Error! : Illegal token " << token << endl;
        }

+        if (!sfen_format.empty())
+        {
+            if (sfen_format == "bin")
+                sfen_output_type = SfenOutputType::Bin;
+            else if (sfen_format == "binpack")
+                sfen_output_type = SfenOutputType::Binpack;
+            else
+            {
+                cout << "Unknown sfen format `" << sfen_format << "`. Using bin\n";
+            }
+        }
+
        // If search depth2 is not set, leave it the same as search depth.
        if (search_depth_max == INT_MIN)
            search_depth_max = search_depth_min;
@ -994,7 +1116,7 @@ namespace Learner
        {
            // Give a random number to output_file_name at this point.
            // Do not use std::random_device().  Because it always the same integers on MinGW.
-            PRNG r(std::chrono::system_clock::now().time_since_epoch().count());
+            PRNG r(seed);
            // Just in case, reassign the random numbers.
            for (int i = 0; i < 10; ++i)
                r.rand(1);
@ -1018,6 +1140,8 @@ namespace Learner
                bookStart.push_back(line);
            }
            myfile.close();
+          } else {
+            bookStart.push_back(StartFEN);
          }
        }
        std::cout << "gensfen : " << endl
@ -1048,12 +1172,30 @@ namespace Learner

        Threads.main()->ponder = false;

+        // About Search::Limits
+        // Be careful because this member variable is global and affects other threads.
+        {
+          auto& limits = Search::Limits;
+
+          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
+          limits.infinite = true;
+
+          // Since PV is an obstacle when displayed, erase it.
+          limits.silent = true;
+
+          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
+          limits.nodes = 0;
+
+          // depth is also processed by the one passed as an argument of Learner::search().
+          limits.depth = 0;
+        }
+
        // Create and execute threads as many as Options["Threads"].
        {
            SfenWriter sfen_writer(output_file_name, thread_num);
            sfen_writer.set_save_interval(save_every);

-            MultiThinkGenSfen multi_think(search_depth_min, search_depth_max, sfen_writer);
+            MultiThinkGenSfen multi_think(search_depth_min, search_depth_max, sfen_writer, seed);
            multi_think.nodes = nodes;
            multi_think.set_loop_max(loop_max);
            multi_think.eval_limit = eval_limit;
@ -1074,7 +1216,5 @@ namespace Learner
        }

        std::cout << "gensfen finished." << endl;
-
    }
 }
-#endif
--- a/src/learn/gensfen.h
+++ b/src/learn/gensfen.h
@ -0,0 +1,14 @@
+#ifndef _GENSFEN_H_
+#define _GENSFEN_H_
+
+#include "position.h"
+
+#include <sstream>
+
+namespace Learner {
+
+    // Automatic generation of teacher position
+    void gen_sfen(Position& pos, std::istringstream& is);
+}
+
+#endif
--- a/src/learn/half_float.h
+++ b/src/learn/half_float.h
@ -7,7 +7,7 @@
 // Floating point operation by 16bit type
 // Assume that the float type code generated by the compiler is in IEEE 754 format and use it.

-#include "../types.h"
+#include "types.h"

 namespace HalfFloat
 {
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@ -1,10 +1,6 @@
 #ifndef _LEARN_H_
 #define _LEARN_H_

-#if defined(EVAL_LEARN)
-
-#include <vector>
-
 // ----------------------
 // Floating point for learning
 // ----------------------
@ -14,7 +10,7 @@
 // Even if it is a double type, there is almost no difference in the way of convergence, so fix it to float.

 // when using float
-typedef float LearnFloatType;
+using LearnFloatType = float;

 // when using double
 //typedef double LearnFloatType;
@ -36,107 +32,48 @@ typedef float LearnFloatType;
 // ----------------------
 // Definition of struct used in Learner
 // ----------------------
-#include "../position.h"
+
+#include "packed_sfen.h"
+
+#include "position.h"
+
+#include <sstream>
+#include <vector>

 namespace Learner
 {
-	// ----------------------
-	// Settings for learning
-	// ----------------------
+    // ----------------------
+    // Settings for learning
+    // ----------------------

-	// mini-batch size.
-	// Calculate the gradient by combining this number of phases.
-	// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
-	// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
-	// I don't think you need to change this value in most cases.
+    // mini-batch size.
+    // Calculate the gradient by combining this number of phases.
+    // If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
+    // If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
+    // I don't think you need to change this value in most cases.

-	constexpr std::size_t LEARN_MINI_BATCH_SIZE = 1000 * 1000 * 1;
+    constexpr std::size_t LEARN_MINI_BATCH_SIZE = 1000 * 1000 * 1;

-	// The number of phases to read from the file at one time. After reading this much, shuffle.
-	// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
-	// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
+    // The number of phases to read from the file at one time. After reading this much, shuffle.
+    // It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
+    // Must be a multiple of THREAD_BUFFER_SIZE(=10000).

-	constexpr std::size_t LEARN_SFEN_READ_SIZE = 1000 * 1000 * 10;
+    constexpr std::size_t LEARN_SFEN_READ_SIZE = 1000 * 1000 * 10;

-	// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
-	// Needless to say, the longer the saving interval, the shorter the learning time.
-	// Folder name is incremented for each save like 0/, 1/, 2/...
-	// By default, once every 1 billion phases.
-	constexpr std::size_t LEARN_EVAL_SAVE_INTERVAL = 1000000000ULL;
+    // Saving interval of evaluation function at learning. Save each time you learn this number of phases.
+    // Needless to say, the longer the saving interval, the shorter the learning time.
+    // Folder name is incremented for each save like 0/, 1/, 2/...
+    // By default, once every 1 billion phases.
+    constexpr std::size_t LEARN_EVAL_SAVE_INTERVAL = 1000000000ULL;

-	// Reduce the output of rmse during learning to 1 for this number of times.
-	// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
-	constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1;
+    // Reduce the output of rmse during learning to 1 for this number of times.
+    // rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
+    constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1;

-	//Structure in which PackedSfen and evaluation value are integrated
-	// If you write different contents for each option, it will be a problem when reusing the teacher game
-	// For the time being, write all the following members regardless of the options.
-	struct PackedSfenValue
-	{
-		// phase
-		PackedSfen sfen;
+    double calc_grad(Value shallow, const PackedSfenValue& psv);

-		// Evaluation value returned from Learner::search()
-		int16_t score;
-
-		// PV first move
-		// Used when finding the match rate with the teacher
-		uint16_t move;
-
-		// Trouble of the phase from the initial phase.
-		uint16_t gamePly;
-
-		// 1 if the player on this side ultimately wins the game. -1 if you are losing.
-		// 0 if a draw is reached.
-		// The draw is in the teacher position generation command gensfen,
-		// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
-		int8_t game_result;
-
-		// When exchanging the file that wrote the teacher aspect with other people
-		//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
-		uint8_t padding;
-
-		// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
-	};
-
-	// Type that returns the reading line and the evaluation value at that time
-	// Used in Learner::search(), Learner::qsearch().
-	typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-	// Phase array: PSVector stands for packed sfen vector.
-	typedef std::vector<PackedSfenValue> PSVector;
-
-	// So far, only Yaneura King 2018 Otafuku has this stub
-	// This stub is required if EVAL_LEARN is defined.
-	extern Learner::ValueAndPV  search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0);
-	extern Learner::ValueAndPV qsearch(Position& pos);
-
-	double calc_grad(Value shallow, const PackedSfenValue& psv);
-	
-	void convert_bin_from_pgn_extract(
-		const std::vector<std::string>& filenames,
-		const std::string& output_file_name,
-		const bool pgn_eval_side_to_move,
-		const bool convert_no_eval_fens_as_score_zero);
-	
-	void convert_bin(
-		const std::vector<std::string>& filenames,
-		const std::string& output_file_name,
-		const int ply_minimum,
-		const int ply_maximum,
-		const int interpolate_eval,
-		const int src_score_min_value,
-		const int src_score_max_value,
-		const int dest_score_min_value,
-		const int dest_score_max_value,
-		const bool check_invalid_fen,
-		const bool check_illegal_move);
-
-	void convert_plain(
-		const std::vector<std::string>& filenames,
-		const std::string& output_file_name);
+    // Learning from the generated game record
+    void learn(Position& pos, std::istringstream& is);
 }

-#endif
-
 #endif // ifndef _LEARN_H_
--- a/src/learn/learning_tools.cpp
+++ b/src/learn/learning_tools.cpp
@ -1,8 +1,6 @@
 #include "learning_tools.h"

-#if defined (EVAL_LEARN)
-
-#include "../misc.h"
+#include "misc.h"

 using namespace Eval;

@ -18,5 +16,3 @@ namespace EvalLearningTools
 	uint64_t Weight::eta1_epoch;
 	uint64_t Weight::eta2_epoch;
 }
-
-#endif
--- a/src/learn/learning_tools.h
+++ b/src/learn/learning_tools.h
@ -5,9 +5,7 @@

 #include "learn.h"

-#if defined (EVAL_LEARN)
-
-#include "../misc.h"  // PRNG , my_insertion_sort
+#include "misc.h"  // PRNG , my_insertion_sort

 #include <array>
 #include <cmath>	// std::sqrt()
@ -98,5 +96,4 @@ namespace EvalLearningTools
 	};
 }

-#endif // defined (EVAL_LEARN)
 #endif
--- a/src/learn/multi_think.cpp
+++ b/src/learn/multi_think.cpp
@ -1,10 +1,9 @@
-#include "../types.h"
+#include "multi_think.h"

-#if defined(EVAL_LEARN)
-
-#include "multi_think.h"
-#include "../tt.h"
-#include "../uci.h"
+#include "tt.h"
+#include "uci.h"
+#include "types.h"
+#include "search.h"

 #include <thread>

@ -27,14 +26,13 @@ void MultiThink::go_think()
 	auto thread_num = (size_t)Options["Threads"];

 	// Secure end flag of worker thread
-	thread_finished.resize(thread_num);
-	
+        threads_finished=0;
+
 	// start worker thread
 	for (size_t i = 0; i < thread_num; ++i)
 	{
-		thread_finished[i] = 0;
 		threads.push_back(std::thread([i, this]
-		{ 
+		{
 			// exhaust all processor threads.
 			WinProcGroup::bindThisThread(i);

@ -42,7 +40,7 @@ void MultiThink::go_think()
 			this->thread_worker(i);

 			// Set the end flag because the thread has ended
-			this->thread_finished[i] = 1;
+			this->threads_finished++;
 		}));
 	}

@ -56,11 +54,7 @@ void MultiThink::go_think()
 	// function to determine if all threads have finished
 	auto threads_done = [&]()
 	{
-		// returns false if no one is finished
-		for (auto& f : thread_finished)
-			if (!f)
-				return false;
-		return true;
+		return threads_finished == thread_num;
 	};

 	// Call back if the callback function is set.
@ -105,6 +99,3 @@ void MultiThink::go_think()
 	// Since the work itself may not have completed, output only that all threads have finished.
 	std::cout << "all threads are joined." << std::endl;
 }
-
-
-#endif // defined(EVAL_LEARN)
--- a/src/learn/multi_think.h
+++ b/src/learn/multi_think.h
@ -1,17 +1,18 @@
 #ifndef _MULTI_THINK_
 #define _MULTI_THINK_

-#if defined(EVAL_LEARN)
+#include "learn.h"

-#include <functional>
-#include <mutex>
-
-#include "../misc.h"
-#include "../learn/learn.h"
-#include "../thread_win32_osx.h"
+#include "misc.h"
+#include "thread_win32_osx.h"

 #include <atomic>
 #include <limits>
+#include <functional>
+#include <mutex>
+#include <string>
+#include <cstdint>
+

 // Learning from a game record, when making yourself think and generating a fixed track, etc.
 // Helper class used when multiple threads want to call Search::think() individually.
@ -20,10 +21,11 @@ struct MultiThink
 {
 	static constexpr std::uint64_t LOOP_COUNT_FINISHED = std::numeric_limits<std::uint64_t>::max();

-	MultiThink() : prng(std::chrono::system_clock::now().time_since_epoch().count())
-	{
-		loop_count = 0;
-	}
+	MultiThink() : prng{}, loop_count(0) { }
+
+	MultiThink(std::uint64_t seed) : prng(seed), loop_count(0) { }
+
+	MultiThink(const std::string& seed) : prng(seed), loop_count(0) { }

 	// Call this function from the master thread, each thread will think,
 	// Return control when the thought ending condition is satisfied.
@ -94,10 +96,7 @@ private:
 	std::mutex loop_mutex;

 	// Thread end flag.
-	// vector<bool> may not be reflected properly when trying to rewrite from multiple threads...
-	typedef uint8_t Flag;
-	std::vector<Flag> thread_finished;
-
+        std::atomic<uint64_t> threads_finished;
 };

 // Mechanism to process task during idle time.
@ -150,6 +149,4 @@ protected:
 	std::mutex task_mutex;
 };

-#endif // defined(EVAL_LEARN) && defined(YANEURAOU_2018_OTAFUKU_ENGINE)
-
 #endif
--- a/src/learn/packed_sfen.h
+++ b/src/learn/packed_sfen.h
@ -0,0 +1,46 @@
+#ifndef _PACKED_SFEN_H_
+#define _PACKED_SFEN_H_
+
+#include <vector>
+#include <cstdint>
+
+namespace Learner {
+
+    // packed sfen
+    struct PackedSfen { std::uint8_t data[32]; };
+
+    // Structure in which PackedSfen and evaluation value are integrated
+    // If you write different contents for each option, it will be a problem when reusing the teacher game
+    // For the time being, write all the following members regardless of the options.
+    struct PackedSfenValue
+    {
+        // phase
+        PackedSfen sfen;
+
+        // Evaluation value returned from Learner::search()
+        std::int16_t score;
+
+        // PV first move
+        // Used when finding the match rate with the teacher
+        std::uint16_t move;
+
+        // Trouble of the phase from the initial phase.
+        std::uint16_t gamePly;
+
+        // 1 if the player on this side ultimately wins the game. -1 if you are losing.
+        // 0 if a draw is reached.
+        // The draw is in the teacher position generation command gensfen,
+        // Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
+        std::int8_t game_result;
+
+        // When exchanging the file that wrote the teacher aspect with other people
+        //Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
+        std::uint8_t padding;
+
+        // 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
+    };
+
+    // Phase array: PSVector stands for packed sfen vector.
+    using PSVector = std::vector<PackedSfenValue>;
+}
+#endif
--- a/src/learn/sfen_packer.cpp
+++ b/src/learn/sfen_packer.cpp
@ -0,0 +1,402 @@
+#include "sfen_packer.h"
+
+#include "packed_sfen.h"
+
+#include "misc.h"
+#include "position.h"
+
+#include <sstream>
+#include <fstream>
+#include <cstring> // std::memset()
+
+using namespace std;
+
+namespace Learner {
+
+  // Class that handles bitstream
+  // useful when doing aspect encoding
+  struct BitStream
+  {
+    // Set the memory to store the data in advance.
+    // Assume that memory is cleared to 0.
+    void set_data(std::uint8_t* data_) { data = data_; reset(); }
+
+    // Get the pointer passed in set_data().
+    uint8_t* get_data() const { return data; }
+
+    // Get the cursor.
+    int get_cursor() const { return bit_cursor; }
+
+    // reset the cursor
+    void reset() { bit_cursor = 0; }
+
+    // Write 1bit to the stream.
+    // If b is non-zero, write out 1. If 0, write 0.
+    void write_one_bit(int b)
+    {
+      if (b)
+        data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
+
+      ++bit_cursor;
+    }
+
+    // Get 1 bit from the stream.
+    int read_one_bit()
+    {
+      int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
+      ++bit_cursor;
+
+      return b;
+    }
+
+    // write n bits of data
+    // Data shall be written out from the lower order of d.
+    void write_n_bit(int d, int n)
+    {
+      for (int i = 0; i <n; ++i)
+        write_one_bit(d & (1 << i));
+    }
+
+    // read n bits of data
+    // Reverse conversion of write_n_bit().
+    int read_n_bit(int n)
+    {
+      int result = 0;
+      for (int i = 0; i < n; ++i)
+        result |= read_one_bit() ? (1 << i) : 0;
+
+      return result;
+    }
+
+  private:
+    // Next bit position to read/write.
+    int bit_cursor;
+
+    // data entity
+    std::uint8_t* data;
+  };
+
+  // Class for compressing/decompressing sfen
+  // sfen can be packed to 256bit (32bytes) by Huffman coding.
+  // This is proven by mini. The above is Huffman coding.
+  //
+  // Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
+  // Side to move (White = 0, Black = 1) (1bit)
+  // White King Position (6 bits)
+  // Black King Position (6 bits)
+  // Huffman Encoding of the board
+  // Castling availability (1 bit x 4)
+  // En passant square (1 or 1 + 6 bits)
+  // Rule 50 (6 bits)
+  // Game play (8 bits)
+  //
+  // TODO(someone): Rename SFEN to FEN.
+  //
+  struct SfenPacker
+  {
+    void pack(const Position& pos);
+
+    // sfen packed by pack() (256bit = 32bytes)
+    // Or sfen to decode with unpack()
+    uint8_t *data; // uint8_t[32];
+
+    BitStream stream;
+
+    // Output the board pieces to stream.
+    void write_board_piece_to_stream(Piece pc);
+
+    // Read one board piece from stream
+    Piece read_board_piece_from_stream();
+  };
+
+
+  // Huffman coding
+  // * is simplified from mini encoding to make conversion easier.
+  //
+  // 1 box on the board (other than NO_PIECE) = 2 to 6 bits (+ 1-bit flag + 1-bit forward and backward)
+  // 1 piece of hand piece = 1-5bit (+ 1-bit flag + 1bit ahead and behind)
+  //
+  // empty xxxxx0 + 0 (none)
+  // step xxxx01 + 2 xxxx0 + 2
+  // incense xx0011 + 2 xx001 + 2
+  // Katsura xx1011 + 2 xx101 + 2
+  // silver xx0111 + 2 xx011 + 2
+  // Gold x01111 + 1 x0111 + 1 // Gold is valid and has no flags.
+  // corner 011111 + 2 01111 + 2
+  // Fly 111111 + 2 11111 + 2
+  //
+  // Assuming all pieces are on the board,
+  // Sky 81-40 pieces = 41 boxes = 41bit
+  // Walk 4bit*18 pieces = 72bit
+  // Incense 6bit*4 pieces = 24bit
+  // Katsura 6bit*4 pieces = 24bit
+  // Silver 6bit*4 pieces = 24bit
+  // Gold 6bit* 4 pieces = 24bit
+  // corner 8bit* 2 pieces = 16bit
+  // Fly 8bit* 2 pieces = 16bit
+  // -------
+  // 241bit + 1bit (turn) + 7bit × 2 (King's position after) = 256bit
+  //
+  // When the piece on the board moves to the hand piece, the piece on the board becomes empty, so the box on the board can be expressed with 1 bit,
+  // Since the hand piece can be expressed by 1 bit less than the piece on the board, the total number of bits does not change in the end.
+  // Therefore, in this expression, any aspect can be expressed by this bit number.
+  // It is a hand piece and no flag is required, but if you include this, the bit number of the piece on the board will be -1
+  // Since the total number of bits can be fixed, we will include this as well.
+
+  // Huffman Encoding
+  //
+  // Empty  xxxxxxx0
+  // Pawn   xxxxx001 + 1 bit (Side to move)
+  // Knight xxxxx011 + 1 bit (Side to move)
+  // Bishop xxxxx101 + 1 bit (Side to move)
+  // Rook   xxxxx111 + 1 bit (Side to move)
+
+  struct HuffmanedPiece
+  {
+    int code; // how it will be coded
+    int bits; // How many bits do you have
+  };
+
+  constexpr HuffmanedPiece huffman_table[] =
+  {
+    {0b0000,1}, // NO_PIECE
+    {0b0001,4}, // PAWN
+    {0b0011,4}, // KNIGHT
+    {0b0101,4}, // BISHOP
+    {0b0111,4}, // ROOK
+    {0b1001,4}, // QUEEN
+  };
+
+  // Pack sfen and store in data[32].
+  void SfenPacker::pack(const Position& pos)
+  {
+  // cout << pos;
+
+    memset(data, 0, 32 /* 256bit */);
+    stream.set_data(data);
+
+    // turn
+    // Side to move.
+    stream.write_one_bit((int)(pos.side_to_move()));
+
+    // 7-bit positions for leading and trailing balls
+    // White king and black king, 6 bits for each.
+    for(auto c: Colors)
+      stream.write_n_bit(pos.king_square(c), 6);
+
+    // Write the pieces on the board other than the kings.
+    for (Rank r = RANK_8; r >= RANK_1; --r)
+    {
+      for (File f = FILE_A; f <= FILE_H; ++f)
+      {
+        Piece pc = pos.piece_on(make_square(f, r));
+        if (type_of(pc) == KING)
+          continue;
+        write_board_piece_to_stream(pc);
+      }
+    }
+
+    // TODO(someone): Support chess960.
+    stream.write_one_bit(pos.can_castle(WHITE_OO));
+    stream.write_one_bit(pos.can_castle(WHITE_OOO));
+    stream.write_one_bit(pos.can_castle(BLACK_OO));
+    stream.write_one_bit(pos.can_castle(BLACK_OOO));
+
+    if (pos.ep_square() == SQ_NONE) {
+      stream.write_one_bit(0);
+    }
+    else {
+      stream.write_one_bit(1);
+      stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
+    }
+
+    stream.write_n_bit(pos.state()->rule50, 6);
+
+    stream.write_n_bit(1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2, 8);
+
+    assert(stream.get_cursor() <= 256);
+  }
+
+  // Output the board pieces to stream.
+  void SfenPacker::write_board_piece_to_stream(Piece pc)
+  {
+    // piece type
+    PieceType pr = type_of(pc);
+    auto c = huffman_table[pr];
+    stream.write_n_bit(c.code, c.bits);
+
+    if (pc == NO_PIECE)
+      return;
+
+    // first and second flag
+    stream.write_one_bit(color_of(pc));
+  }
+
+  // Read one board piece from stream
+  Piece SfenPacker::read_board_piece_from_stream()
+  {
+    PieceType pr = NO_PIECE_TYPE;
+    int code = 0, bits = 0;
+    while (true)
+    {
+      code |= stream.read_one_bit() << bits;
+      ++bits;
+
+      assert(bits <= 6);
+
+      for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
+        if (huffman_table[pr].code == code
+          && huffman_table[pr].bits == bits)
+          goto Found;
+    }
+  Found:;
+    if (pr == NO_PIECE_TYPE)
+      return NO_PIECE;
+
+    // first and second flag
+    Color c = (Color)stream.read_one_bit();
+
+    return make_piece(c, pr);
+  }
+
+  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror)
+  {
+    SfenPacker packer;
+    auto& stream = packer.stream;
+
+    // TODO: separate streams for writing and reading. Here we actually have to
+    // const_cast which is not safe in the long run.
+    stream.set_data(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(&sfen)));
+
+    pos.clear();
+    std::memset(si, 0, sizeof(StateInfo));
+    std::fill_n(&pos.pieceList[0][0], sizeof(pos.pieceList) / sizeof(Square), SQ_NONE);
+    pos.st = si;
+
+    // Active color
+    pos.sideToMove = (Color)stream.read_one_bit();
+
+    pos.pieceList[W_KING][0] = SQUARE_NB;
+    pos.pieceList[B_KING][0] = SQUARE_NB;
+
+    // First the position of the ball
+    if (mirror)
+    {
+      for (auto c : Colors)
+        pos.board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
+    }
+    else
+    {
+      for (auto c : Colors)
+        pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
+    }
+
+    // Piece placement
+    for (Rank r = RANK_8; r >= RANK_1; --r)
+    {
+      for (File f = FILE_A; f <= FILE_H; ++f)
+      {
+        auto sq = make_square(f, r);
+        if (mirror) {
+          sq = flip_file(sq);
+        }
+
+        // it seems there are already balls
+        Piece pc;
+        if (type_of(pos.board[sq]) != KING)
+        {
+          assert(pos.board[sq] == NO_PIECE);
+          pc = packer.read_board_piece_from_stream();
+        }
+        else
+        {
+          pc = pos.board[sq];
+          // put_piece() will catch ASSERT unless you remove it all.
+          pos.board[sq] = NO_PIECE;
+        }
+
+        // There may be no pieces, so skip in that case.
+        if (pc == NO_PIECE)
+          continue;
+
+        pos.put_piece(Piece(pc), sq);
+
+        if (stream.get_cursor()> 256)
+          return 1;
+
+        //assert(stream.get_cursor() <= 256);
+      }
+    }
+
+    // Castling availability.
+    // TODO(someone): Support chess960.
+    pos.st->castlingRights = 0;
+    if (stream.read_one_bit()) {
+      Square rsq;
+      for (rsq = relative_square(WHITE, SQ_H1); pos.piece_on(rsq) != W_ROOK; --rsq) {}
+      pos.set_castling_right(WHITE, rsq);
+    }
+    if (stream.read_one_bit()) {
+      Square rsq;
+      for (rsq = relative_square(WHITE, SQ_A1); pos.piece_on(rsq) != W_ROOK; ++rsq) {}
+      pos.set_castling_right(WHITE, rsq);
+    }
+    if (stream.read_one_bit()) {
+      Square rsq;
+      for (rsq = relative_square(BLACK, SQ_H1); pos.piece_on(rsq) != B_ROOK; --rsq) {}
+      pos.set_castling_right(BLACK, rsq);
+    }
+    if (stream.read_one_bit()) {
+      Square rsq;
+      for (rsq = relative_square(BLACK, SQ_A1); pos.piece_on(rsq) != B_ROOK; ++rsq) {}
+      pos.set_castling_right(BLACK, rsq);
+    }
+
+    // En passant square. Ignore if no pawn capture is possible
+    if (stream.read_one_bit()) {
+      Square ep_square = static_cast<Square>(stream.read_n_bit(6));
+      if (mirror) {
+        ep_square = flip_file(ep_square);
+      }
+      pos.st->epSquare = ep_square;
+
+      if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
+        || !(pos.pieces(~pos.sideToMove, PAWN) & (pos.st->epSquare + pawn_push(~pos.sideToMove))))
+        pos.st->epSquare = SQ_NONE;
+    }
+    else {
+      pos.st->epSquare = SQ_NONE;
+    }
+
+    // Halfmove clock
+    pos.st->rule50 = static_cast<Square>(stream.read_n_bit(6));
+
+    // Fullmove number
+    pos.gamePly = static_cast<Square>(stream.read_n_bit(8));
+
+    // Convert from fullmove starting from 1 to gamePly starting from 0,
+    // handle also common incorrect FEN with fullmove = 0.
+    pos.gamePly = std::max(2 * (pos.gamePly - 1), 0) + (pos.sideToMove == BLACK);
+
+    assert(stream.get_cursor() <= 256);
+
+    pos.chess960 = false;
+    pos.thisThread = th;
+    pos.set_state(pos.st);
+
+    assert(pos.pos_is_ok());
+
+    return 0;
+  }
+
+  PackedSfen sfen_pack(Position& pos)
+  {
+    PackedSfen sfen;
+
+    SfenPacker sp;
+    sp.data = (uint8_t*)&sfen;
+    sp.pack(pos);
+
+    return sfen;
+  }
+}
--- a/src/learn/sfen_packer.h
+++ b/src/learn/sfen_packer.h
@ -0,0 +1,20 @@
+#ifndef _SFEN_PACKER_H_
+#define _SFEN_PACKER_H_
+
+#include "types.h"
+
+#include "learn/packed_sfen.h"
+
+#include <cstdint>
+
+class Position;
+struct StateInfo;
+class Thread;
+
+namespace Learner {
+
+    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+    PackedSfen sfen_pack(Position& pos);
+}
+
+#endif
--- a/src/misc.h
+++ b/src/misc.h
@ -19,6 +19,7 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED

+#include <algorithm>
 #include <cassert>
 #include <chrono>
 #include <functional>
@ -28,6 +29,7 @@
 #include <vector>
 #include <utility>
 #include <cmath>
+#include <cctype>

 #include "types.h"

@ -83,6 +85,19 @@ std::ostream& operator<<(std::ostream&, SyncCout);
 /// For further analysis see
 ///   <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>

+static uint64_t string_hash(const std::string& str)
+{
+  uint64_t h = 525201411107845655ull;
+
+  for (auto c : str) {
+    h ^= static_cast<uint64_t>(c);
+    h *= 0x5bd1e9955bd1e995ull;
+    h ^= h >> 47;
+  }
+
+  return h;
+}
+
 class PRNG {

  uint64_t s;
@ -94,7 +109,9 @@ class PRNG {
  }

 public:
+  PRNG() { set_seed_from_time(); }
  PRNG(uint64_t seed) : s(seed) { assert(seed); }
+  PRNG(const std::string& seed) { set_seed(seed); }

  template<typename T> T rand() { return T(rand64()); }

@ -107,6 +124,28 @@ public:

  // Return the random seed used internally.
  uint64_t get_seed() const { return s; }
+
+  void set_seed(uint64_t seed) { s = seed; }
+
+  void set_seed_from_time()
+  {
+      set_seed(std::chrono::system_clock::now().time_since_epoch().count());
+  }
+
+  void set_seed(const std::string& str)
+  {
+    if (str.empty())
+    {
+      set_seed_from_time();
+    }
+    else if (std::all_of(str.begin(), str.end(), [](char c) { return std::isdigit(c);} )) {
+      set_seed(std::stoull(str));
+    }
+    else
+    {
+      set_seed(string_hash(str));
+    }
+  }
 };

 // Display a random seed. (For debugging)
@ -166,7 +205,9 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
 // async version of PRNG
 struct AsyncPRNG
 {
+  AsyncPRNG() : prng() { }
  AsyncPRNG(uint64_t seed) : prng(seed) { assert(seed); }
+  AsyncPRNG(const std::string& seed) : prng(seed) { }
  // [ASYNC] Extract one random number.
  template<typename T> T rand() {
    std::unique_lock<std::mutex> lk(mutex);
--- a/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
+++ b/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
@ -1,7 +1,25 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
 // Definition of input features and network structure used in NNUE evaluation function

-#ifndef HALFKP_CR_EP_256X2_32_32_H
-#define HALFKP_CR_EP_256X2_32_32_H
+#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED

 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@ -12,31 +30,28 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"

-namespace Eval {
+namespace Eval::NNUE {

-  namespace NNUE {
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
+    Features::EnPassant>;

-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<
-      Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
-      Features::EnPassant>;
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;

-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
+namespace Layers {

-    namespace Layers {
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;

-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+}  // namespace Layers

-    }  // namespace Layers
+using Network = Layers::OutputLayer;

-    using Network = Layers::OutputLayer;
+}  // namespace Eval::NNUE

-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // HALFKP_CR_EP_256X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
--- a/src/nnue/architectures/halfkp-cr_256x2-32-32.h
+++ b/src/nnue/architectures/halfkp-cr_256x2-32-32.h
@ -0,0 +1,37 @@
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+
+#include "../features/feature_set.h"
+#include "../features/half_kp.h"
+#include "../features/castling_right.h"
+
+#include "../layers/input_slice.h"
+#include "../layers/affine_transform.h"
+#include "../layers/clipped_relu.h"
+
+namespace Eval::NNUE {
+
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight>;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
+
+namespace Layers {
+
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+}  // namespace Layers
+
+using Network = Layers::OutputLayer;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
--- a/src/nnue/architectures/k-p_256x2-32-32.h
+++ b/src/nnue/architectures/k-p_256x2-32-32.h
@ -1,4 +1,5 @@
 // Definition of input features and network structure used in NNUE evaluation function
+
 #ifndef K_P_256X2_32_32_H
 #define K_P_256X2_32_32_H

--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@ -1,7 +1,5 @@
 // Code for learning NNUE evaluation function

-#if defined(EVAL_LEARN)
-
 #include <random>
 #include <fstream>
 #include <filesystem>
@ -238,5 +236,3 @@ double get_eta() {
 }

 }  // namespace Eval
-
-#endif  // defined(EVAL_LEARN)
--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@ -3,8 +3,6 @@
 #ifndef _EVALUATE_NNUE_LEARNER_H_
 #define _EVALUATE_NNUE_LEARNER_H_

-#if defined(EVAL_LEARN)
-
 #include "../learn/learn.h"

 namespace Eval {
@ -43,6 +41,4 @@ void FinalizeNet();

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/nnue/features/castling_right.cpp
+++ b/src/nnue/features/castling_right.cpp
@ -1,69 +1,40 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function

 #include "castling_right.h"
 #include "index_list.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
+  void CastlingRight::AppendActiveIndices(
+    const Position& pos, Color perspective, IndexList* active) {
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-    namespace Features {
+    int castling_rights = pos.state()->castlingRights;
+    int relative_castling_rights;
+    if (perspective == WHITE) {
+      relative_castling_rights = castling_rights;
+    }
+    else {
+      // Invert the perspective.
+      relative_castling_rights = ((castling_rights & 3) << 2)
+        & ((castling_rights >> 2) & 3);
+    }

-      // Get a list of indices with a value of 1 among the features
-      void CastlingRight::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        int castling_rights = pos.state()->castlingRights;
-        int relative_castling_rights;
-        if (perspective == WHITE) {
-          relative_castling_rights = castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_castling_rights = ((castling_rights & 3) << 2)
-            & ((castling_rights >> 2) & 3);
-        }
-
-        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-          if (relative_castling_rights & (i << 1)) {
-            active->push_back(i);
-          }
-        }
+    for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
+      if (relative_castling_rights & (1 << i)) {
+        active->push_back(i);
      }
+    }
+  }

-      // Get a list of indices whose values have changed from the previous one in the feature quantity
-      void CastlingRight::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* /* added */) {
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
+  void CastlingRight::AppendChangedIndices(
+    const Position& /* pos */, Color /* perspective */,
+    IndexList* /* removed */, IndexList* /* added */) {
+    // Not implemented.
+    assert(false);
+  }

-        int previous_castling_rights = pos.state()->previous->castlingRights;
-        int current_castling_rights = pos.state()->castlingRights;
-        int relative_previous_castling_rights;
-        int relative_current_castling_rights;
-        if (perspective == WHITE) {
-          relative_previous_castling_rights = previous_castling_rights;
-          relative_current_castling_rights = current_castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
-            & ((previous_castling_rights >> 2) & 3);
-          relative_current_castling_rights = ((current_castling_rights & 3) << 2)
-            & ((current_castling_rights >> 2) & 3);
-        }
-
-        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-          if ((relative_previous_castling_rights & (i << 1)) &&
-            (relative_current_castling_rights & (i << 1)) == 0) {
-            removed->push_back(i);
-          }
-        }
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
--- a/src/nnue/features/castling_right.h
+++ b/src/nnue/features/castling_right.h
@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function

 #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
 #define _NNUE_FEATURES_CASTLING_RIGHT_H_
@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  class CastlingRight {
+  public:
+    // feature quantity name
+    static constexpr const char* kName = "CastlingRight";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = 0x913968AAu;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = 4;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = 4;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;

-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+      IndexList* active);

-      // Feature K: Ball position
-      class CastlingRight {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "CastlingRight";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x913968AAu;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 4;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 4;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added);
+  };

-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features

 #endif
--- a/src/nnue/features/enpassant.cpp
+++ b/src/nnue/features/enpassant.cpp
@ -1,43 +1,30 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function

 #include "enpassant.h"
 #include "index_list.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
+  void EnPassant::AppendActiveIndices(
+    const Position& pos, Color /* perspective */, IndexList* active) {
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-    namespace Features {
+    auto epSquare = pos.state()->epSquare;
+    if (epSquare == SQ_NONE) {
+      return;
+    }
+    auto file = file_of(epSquare);
+    active->push_back(file);
+  }

-      // Get a list of indices with a value of 1 among the features
-      void EnPassant::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
+  void EnPassant::AppendChangedIndices(
+    const Position& /* pos */, Color /* perspective */,
+    IndexList* /* removed */, IndexList* /* added */) {
+    // Not implemented.
+    assert(false);
+  }

-        auto epSquare = pos.state()->epSquare;
-        if (epSquare == SQ_NONE) {
-          return;
-        }
-
-        if (perspective == BLACK) {
-          epSquare = flip_rank(epSquare);
-        }
-
-        auto file = file_of(epSquare);
-        active->push_back(file);
-      }
-
-      // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-      void EnPassant::AppendChangedIndices(
-        const Position& /* pos */, Color /* perspective */,
-        IndexList* /* removed */, IndexList* /* added */) {
-        // Not implemented.
-        assert(false);
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
--- a/src/nnue/features/enpassant.h
+++ b/src/nnue/features/enpassant.h
@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function

 #ifndef _NNUE_FEATURES_ENPASSANT_H_
 #define _NNUE_FEATURES_ENPASSANT_H_
@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  class EnPassant {
+  public:
+    // feature quantity name
+    static constexpr const char* kName = "EnPassant";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = 0x02924F91u;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = 8;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = 1;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;

-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+      IndexList* active);

-      // Feature K: Ball position
-      class EnPassant {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "EnPassant";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x02924F91u;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 8;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 1;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    // Get a list of indices whose values ??have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added);
+  };

-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features

 #endif
--- a/src/nnue/features/feature_set.h
+++ b/src/nnue/features/feature_set.h
@ -105,9 +105,20 @@ namespace Eval::NNUE::Features {
      for (Color perspective : { WHITE, BLACK }) {
        reset[perspective] = false;
        switch (trigger) {
+          case TriggerEvent::kNone:
+            break;
          case TriggerEvent::kFriendKingMoved:
            reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
            break;
+          case TriggerEvent::kEnemyKingMoved:
+              reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
+            break;
+          case TriggerEvent::kAnyKingMoved:
+            reset[perspective] = type_of(dp.piece[0]) == KING;
+            break;
+          case TriggerEvent::kAnyPieceMoved:
+            reset[perspective] = true;
+            break;
          default:
            assert(false);
            break;
--- a/src/nnue/features/features_common.h
+++ b/src/nnue/features/features_common.h
@ -34,10 +34,10 @@ namespace Eval::NNUE::Features {
  // Trigger to perform full calculations instead of difference only
  enum class TriggerEvent {
    kNone, // Calculate the difference whenever possible
-    kFriendKingMoved, // calculate all when own ball moves
-    kEnemyKingMoved, // do all calculations when enemy balls move
-    kAnyKingMoved, // do all calculations if either ball moves
-    kAnyPieceMoved, // always do all calculations
+    kFriendKingMoved, // calculate full evaluation when own king moves
+    kEnemyKingMoved, // calculate full evaluation when opponent king moves
+    kAnyKingMoved, // calculate full evaluation when any king moves
+    kAnyPieceMoved, // always calculate full evaluation
  };

  enum class Side {
--- a/src/nnue/features/index_list.h
+++ b/src/nnue/features/index_list.h
@ -50,7 +50,7 @@ namespace Eval::NNUE::Features {
    }

   private:
-    T values_[MaxSize];
+    T values_[MaxSize] = {};
    std::size_t size_ = 0;
  };

--- a/src/nnue/features/k.cpp
+++ b/src/nnue/features/k.cpp
@ -32,19 +32,11 @@ void K::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
-  Color king_color;
-  if (dp.piece[0] == Piece::W_KING) {
-    king_color = WHITE;
+  if (type_of(dp.piece[0]) == KING)
+  {
+    removed->push_back(MakeIndex(perspective, dp.from[0], color_of(dp.piece[0])));
+    added->push_back(MakeIndex(perspective, dp.to[0], color_of(dp.piece[0])));
  }
-  else if (dp.piece[0] == Piece::B_KING) {
-    king_color = BLACK;
-  }
-  else {
-    return;
-  }
-
-  removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
-  added->push_back(MakeIndex(perspective, dp.to[0], king_color));
 }

 }  // namespace Features
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@ -22,7 +22,7 @@
 #define NNUE_ARCHITECTURE_H_INCLUDED

 // Defines the network structure
-#include "architectures/halfkp_256x2-32-32.h"
+#include "architectures/halfkp-cr-ep_256x2-32-32.h"

 namespace Eval::NNUE {

--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@ -1,4 +1,4 @@
-/*
+/*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

@ -193,6 +193,12 @@ namespace Eval::NNUE {
              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m256i sum1 = _mm256_loadA_si256(
            &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
          _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
        }
@ -204,6 +210,12 @@ namespace Eval::NNUE {
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);

          _mm_store_si128(&out[j],
@ -224,6 +236,12 @@ namespace Eval::NNUE {
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m64 sum1 = *(&reinterpret_cast<const __m64*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
          const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
          out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
        }
@ -233,12 +251,19 @@ namespace Eval::NNUE {
        for (IndexType j = 0; j < kNumChunks; ++j) {
          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
              accumulation[perspectives[p]][0])[j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
+                accumulation[perspectives[p]][i])[j]);
+          }
          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
        }

  #else
        for (IndexType j = 0; j < kHalfDimensions; ++j) {
          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum += accumulation[static_cast<int>(perspectives[p])][i][j];
+          }
          output[offset + j] = static_cast<OutputType>(
              std::max<int>(0, std::min<int>(127, sum)));
        }
@ -255,44 +280,54 @@ namespace Eval::NNUE {
    void RefreshAccumulator(const Position& pos) const {

      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (Color perspective : { WHITE, BLACK }) {
-  #ifdef TILING
-        for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
-          auto biasesTile = reinterpret_cast<const vec_t*>(
-              &biases_[j * kTileHeight]);
-          auto accTile = reinterpret_cast<vec_t*>(
-              &accumulator.accumulation[perspective][i][j * kTileHeight]);
-          vec_t acc[kNumRegs];
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList active_indices[2];
+        RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                         active_indices);
+        for (Color perspective : { WHITE, BLACK }) {
+    #ifdef TILING
+          for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+            auto accTile = reinterpret_cast<vec_t*>(
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
+            vec_t acc[kNumRegs];

-          for (unsigned k = 0; k < kNumRegs; ++k)
-            acc[k] = biasesTile[k];
+            if (i == 0) {
+              auto biasesTile = reinterpret_cast<const vec_t*>(
+                  &biases_[j * kTileHeight]);
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = biasesTile[k];
+            } else {
+              std::memset(acc, 0, kNumRegs * sizeof(vec_t));
+            }
+            for (const auto index : active_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);

-          for (const auto index : active_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-            auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_add_16(acc[k], column[k]);
+            }

-            for (unsigned k = 0; k < kNumRegs; ++k)
-              acc[k] = vec_add_16(acc[k], column[k]);
+            for (unsigned k = 0; k < kNumRegs; k++)
+              vec_store(&accTile[k], acc[k]);
+          }
+    #else
+          if (i == 0) {
+            std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                        kHalfDimensions * sizeof(BiasType));
+          } else {
+            std::memset(accumulator.accumulation[perspective][i], 0,
+                        kHalfDimensions * sizeof(BiasType));
          }

-          for (unsigned k = 0; k < kNumRegs; k++)
-            vec_store(&accTile[k], acc[k]);
-        }
-  #else
-        std::memcpy(accumulator.accumulation[perspective][i], biases_,
-            kHalfDimensions * sizeof(BiasType));
+          for (const auto index : active_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;

-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-
-          for (IndexType j = 0; j < kHalfDimensions; ++j)
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+            for (IndexType j = 0; j < kHalfDimensions; ++j)
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+          }
+    #endif
        }
-  #endif
+
      }

  #if defined(USE_MMX)
@ -307,86 +342,95 @@ namespace Eval::NNUE {

      const auto prev_accumulator = pos.state()->previous->accumulator;
      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList removed_indices[2], added_indices[2];
+        bool reset[2];
+        RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                          removed_indices, added_indices, reset);

-  #ifdef TILING
-      for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+    #ifdef TILING
+        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+          for (Color perspective : { WHITE, BLACK }) {
+            auto accTile = reinterpret_cast<vec_t*>(
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
+            vec_t acc[kNumRegs];
+
+            if (reset[perspective]) {
+              if (i == 0) {
+                auto biasesTile = reinterpret_cast<const vec_t*>(
+                    &biases_[j * kTileHeight]);
+                for (unsigned k = 0; k < kNumRegs; ++k)
+                  acc[k] = biasesTile[k];
+              } else {
+                std::memset(acc, 0, kNumRegs * sizeof(vec_t));
+              }
+            } else {
+              auto prevAccTile = reinterpret_cast<const vec_t*>(
+                  &prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
+              for (IndexType k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_load(&prevAccTile[k]);
+
+              // Difference calculation for the deactivated features
+              for (const auto index : removed_indices[perspective]) {
+                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+
+                for (IndexType k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_sub_16(acc[k], column[k]);
+              }
+            }
+            { // Difference calculation for the activated features
+              for (const auto index : added_indices[perspective]) {
+                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+
+                for (IndexType k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_add_16(acc[k], column[k]);
+              }
+            }
+
+            for (IndexType k = 0; k < kNumRegs; ++k)
+              vec_store(&accTile[k], acc[k]);
+          }
+        }
+    #if defined(USE_MMX)
+        _mm_empty();
+    #endif
+
+    #else
        for (Color perspective : { WHITE, BLACK }) {
-          auto accTile = reinterpret_cast<vec_t*>(
-              &accumulator.accumulation[perspective][i][j * kTileHeight]);
-          vec_t acc[kNumRegs];

          if (reset[perspective]) {
-            auto biasesTile = reinterpret_cast<const vec_t*>(
-                &biases_[j * kTileHeight]);
-            for (unsigned k = 0; k < kNumRegs; ++k)
-              acc[k] = biasesTile[k];
+            if (i == 0) {
+              std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                          kHalfDimensions * sizeof(BiasType));
+            } else {
+              std::memset(accumulator.accumulation[perspective][i], 0,
+                          kHalfDimensions * sizeof(BiasType));
+            }
          } else {
-            auto prevAccTile = reinterpret_cast<const vec_t*>(
-                &prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
-            for (IndexType k = 0; k < kNumRegs; ++k)
-              acc[k] = vec_load(&prevAccTile[k]);
-
+            std::memcpy(accumulator.accumulation[perspective][i],
+                        prev_accumulator.accumulation[perspective][i],
+                        kHalfDimensions * sizeof(BiasType));
            // Difference calculation for the deactivated features
            for (const auto index : removed_indices[perspective]) {
-              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+              const IndexType offset = kHalfDimensions * index;

-              for (IndexType k = 0; k < kNumRegs; ++k)
-                acc[k] = vec_sub_16(acc[k], column[k]);
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
+                accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
            }
          }
          { // Difference calculation for the activated features
            for (const auto index : added_indices[perspective]) {
-              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
-              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+              const IndexType offset = kHalfDimensions * index;

-              for (IndexType k = 0; k < kNumRegs; ++k)
-                acc[k] = vec_add_16(acc[k], column[k]);
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
+                accumulator.accumulation[perspective][i][j] += weights_[offset + j];
            }
          }
-
-          for (IndexType k = 0; k < kNumRegs; ++k)
-            vec_store(&accTile[k], acc[k]);
        }
+    #endif
      }
-  #if defined(USE_MMX)
-      _mm_empty();
-  #endif
-
-  #else
-      for (Color perspective : { WHITE, BLACK }) {
-
-        if (reset[perspective]) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator.accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
-          // Difference calculation for the deactivated features
-          for (const auto index : removed_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-
-            for (IndexType j = 0; j < kHalfDimensions; ++j)
-              accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
-          }
-        }
-        { // Difference calculation for the activated features
-          for (const auto index : added_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-
-            for (IndexType j = 0; j < kHalfDimensions; ++j)
-              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-          }
-        }
-      }
-  #endif
-
      accumulator.computed_accumulation = true;
    }

--- a/src/nnue/trainer/trainer.h
+++ b/src/nnue/trainer/trainer.h
@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_H_
 #define _NNUE_TRAINER_H_

-#if defined(EVAL_LEARN)
-
 #include "../nnue_common.h"
 #include "../features/index_list.h"

@ -120,6 +118,4 @@ std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/nnue/trainer/trainer_affine_transform.h
+++ b/src/nnue/trainer/trainer_affine_transform.h
@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
 #define _NNUE_TRAINER_AFFINE_TRANSFORM_H_

-#if defined(EVAL_LEARN)
-
 #include "../../learn/learn.h"
 #include "../layers/affine_transform.h"
 #include "trainer.h"
@ -196,7 +194,7 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
      weights_(),
      biases_diff_(),
      weights_diff_(),
-      momentum_(0.1),
+      momentum_(0.0),
      learning_rate_scale_(1.0) {
    DequantizeParameters();
  }
@ -296,6 +294,4 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/nnue/trainer/trainer_clipped_relu.h
+++ b/src/nnue/trainer/trainer_clipped_relu.h
@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
 #define _NNUE_TRAINER_CLIPPED_RELU_H_

-#if defined(EVAL_LEARN)
-
 #include "../../learn/learn.h"
 #include "../layers/clipped_relu.h"
 #include "trainer.h"
@ -137,6 +135,4 @@ class Trainer<Layers::ClippedReLU<PreviousLayer>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/nnue/trainer/trainer_feature_transformer.h
+++ b/src/nnue/trainer/trainer_feature_transformer.h
@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
 #define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_

-#if defined(EVAL_LEARN)
-
 #include "../../learn/learn.h"
 #include "../nnue_feature_transformer.h"
 #include "trainer.h"
@ -234,7 +232,7 @@ class Trainer<FeatureTransformer> {
      biases_(),
      weights_(),
      biases_diff_(),
-      momentum_(0.1),
+      momentum_(0.0),
      learning_rate_scale_(1.0) {
    min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
    max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
@ -372,6 +370,4 @@ class Trainer<FeatureTransformer> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/nnue/trainer/trainer_input_slice.h
+++ b/src/nnue/trainer/trainer_input_slice.h
@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_INPUT_SLICE_H_
 #define _NNUE_TRAINER_INPUT_SLICE_H_

-#if defined(EVAL_LEARN)
-
 #include "../../learn/learn.h"
 #include "../layers/input_slice.h"
 #include "trainer.h"
@ -246,6 +244,4 @@ class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/nnue/trainer/trainer_sum.h
+++ b/src/nnue/trainer/trainer_sum.h
@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_SUM_H_
 #define _NNUE_TRAINER_SUM_H_

-#if defined(EVAL_LEARN)
-
 #include "../../learn/learn.h"
 #include "../layers/sum.h"
 #include "trainer.h"
@ -185,6 +183,4 @@ class Trainer<Layers::Sum<PreviousLayer>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN)
-
 #endif
--- a/src/position.cpp
+++ b/src/position.cpp
@ -32,6 +32,9 @@
 #include "uci.h"
 #include "syzygy/tbprobe.h"

+#include "learn/packed_sfen.h"
+#include "learn/sfen_packer.h"
+
 using std::string;

 namespace Zobrist {
@ -754,7 +757,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
      else
          st->nonPawnMaterial[them] -= PieceValue[MG][captured];

-      if (Eval::useNNUE)
+      if (Eval::useNNUE != Eval::UseNNUEMode::False)
      {
          dp.dirty_num = 2;  // 1 piece moved, 1 piece captured
          dp.piece[1] = captured;
@ -798,7 +801,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  // Move the piece. The tricky Chess960 castling is handled earlier
  if (type_of(m) != CASTLING)
  {
-      if (Eval::useNNUE)
+      if (Eval::useNNUE != Eval::UseNNUEMode::False)
      {
          dp.piece[0] = pc;
          dp.from[0] = from;
@ -829,7 +832,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
          remove_piece(to);
          put_piece(promotion, to);

-          if (Eval::useNNUE)
+          if (Eval::useNNUE != Eval::UseNNUEMode::False)
          {
              // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
              dp.to[0] = SQ_NONE;
@ -967,7 +970,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
  rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
  to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);

-  if (Do && Eval::useNNUE)
+  if (Do && Eval::useNNUE != Eval::UseNNUEMode::False)
  {
      auto& dp = st->dirtyPiece;
      dp.piece[0] = make_piece(us, KING);
@ -996,7 +999,7 @@ void Position::do_null_move(StateInfo& newSt) {
  assert(!checkers());
  assert(&newSt != st);

-  if (Eval::useNNUE)
+  if (Eval::useNNUE != Eval::UseNNUEMode::False)
  {
      std::memcpy(&newSt, st, sizeof(StateInfo));
  }
@ -1344,3 +1347,35 @@ bool Position::pos_is_ok() const {

  return true;
 }
+
+// Add a function that directly unpacks for speed. It's pretty tough.
+// Write it by combining packer::unpack() and Position::set().
+// If there is a problem with the passed phase and there is an error, non-zero is returned.
+int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th, bool mirror)
+{
+  return Learner::set_from_packed_sfen(*this, sfen, si, th, mirror);
+}
+
+// Give the board, hand piece, and turn, and return the sfen.
+//std::string Position::sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly_)
+//{
+// // Copy it to an internal structure and call sfen() if the conversion process depends only on it
+// // Maybe it will be converted normally...
+//  Position pos;
+//
+//  memcpy(pos.board, board, sizeof(Piece) * 81);
+//  memcpy(pos.hand, hands, sizeof(Hand) * 2);
+//  pos.sideToMove = turn;
+//  pos.gamePly = gamePly_;
+//
+//  return pos.sfen();
+//
+// // Implementation of ↑ is beautiful, but slow.
+// // This is a bottleneck when learning a large amount of game records, so write a function to unpack directly.
+//}
+
+// Get the packed sfen. Returns to the buffer specified in the argument.
+void Position::sfen_pack(Learner::PackedSfen& sfen)
+{
+  sfen = Learner::sfen_pack(*this);
+}
--- a/src/position.h
+++ b/src/position.h
@ -30,6 +30,9 @@

 #include "nnue/nnue_accumulator.h"

+#include "learn/packed_sfen.h"
+#include "learn/sfen_packer.h"
+

 /// StateInfo struct stores information needed to restore a Position object to
 /// its previous state when we retract a move. Whenever a move is made on the
@ -75,9 +78,6 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
 /// traversing the search tree.
 class Thread;

-// packed sfen
-struct PackedSfen { uint8_t data[32]; }; 
-
 class Position {
 public:
  static void init();
@ -175,26 +175,27 @@ public:
  // Used by NNUE
  StateInfo* state() const;

-#if defined(EVAL_LEARN)
  // --sfenization helper

+  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+
  // Get the packed sfen. Returns to the buffer specified in the argument.
  // Do not include gamePly in pack.
-  void sfen_pack(PackedSfen& sfen);
+  void sfen_pack(Learner::PackedSfen& sfen);

  // It is slow to go through sfen, so I made a function to set packed sfen directly.
  // Equivalent to pos.set(sfen_unpack(data),si,th);.
  // If there is a problem with the passed phase and there is an error, non-zero is returned.
  // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
-  int set_from_packed_sfen(const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
+  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
+
+  void clear() { std::memset(this, 0, sizeof(Position)); }

  // Give the board, hand piece, and turn, and return the sfen.
  //static std::string sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly);

  // Returns the position of the ball on the c side.
  Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; }
-#endif // EVAL_LEARN
-  bool RootInTB;

 private:
  // Initialization helpers (used while setting up a position)
--- a/src/search.cpp
+++ b/src/search.cpp
@ -40,19 +40,12 @@ namespace Search {
  LimitsType Limits;
 }

-namespace Tablebases {
-
-  int Cardinality;
-  bool UseRule50;
-  Depth ProbeDepth;
-}
-
-namespace TB = Tablebases;
-
 using std::string;
 using Eval::evaluate;
 using namespace Search;

+bool Search::prune_at_shallow_depth = true;
+
 namespace {

  // Different node types, used as a template parameter
@ -714,27 +707,27 @@ namespace {
    }

    // Step 5. Tablebases probe
-    if (!rootNode && TB::Cardinality)
+    if (!rootNode && thisThread->Cardinality)
    {
        int piecesCount = pos.count<ALL_PIECES>();

-        if (    piecesCount <= TB::Cardinality
-            && (piecesCount <  TB::Cardinality || depth >= TB::ProbeDepth)
+        if (    piecesCount <= thisThread->Cardinality
+            && (piecesCount <  thisThread->Cardinality || depth >= thisThread->ProbeDepth)
            &&  pos.rule50_count() == 0
            && !pos.can_castle(ANY_CASTLING))
        {
-            TB::ProbeState err;
-            TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
+            Tablebases::ProbeState err;
+            Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &err);

            // Force check of time on the next occasion
            if (thisThread == Threads.main())
                static_cast<MainThread*>(thisThread)->callsCnt = 0;

-            if (err != TB::ProbeState::FAIL)
+            if (err != Tablebases::ProbeState::FAIL)
            {
                thisThread->tbHits.fetch_add(1, std::memory_order_relaxed);

-                int drawScore = TB::UseRule50 ? 1 : 0;
+                int drawScore = thisThread->UseRule50 ? 1 : 0;

                // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score
                value =  wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1
@ -995,9 +988,7 @@ moves_loop: // When in check, search starts from here
      ss->moveCount = ++moveCount;

      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000
-#if defined(EVAL_LEARN)
          && !Limits.silent
-#endif
          )
          sync_cout << "info depth " << depth
                    << " currmove " << UCI::move(move, pos.is_chess960())
@ -1015,6 +1006,7 @@ moves_loop: // When in check, search starts from here

      // Step 13. Pruning at shallow depth (~200 Elo)
      if (  !rootNode
+          && (PvNode ? prune_at_shallow_depth : true)
          && pos.non_pawn_material(us)
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
      {
@ -1526,6 +1518,7 @@ moves_loop: // When in check, search starts from here

      // Futility pruning
      if (   !ss->inCheck
+          && Search::prune_at_shallow_depth
          && !givesCheck
          &&  futilityBase > -VALUE_KNOWN_WIN
          && !pos.advanced_pawn_push(move))
@ -1553,6 +1546,7 @@ moves_loop: // When in check, search starts from here

      // Do not search moves with negative SEE values
      if (   !ss->inCheck
+          && Search::prune_at_shallow_depth
          && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move))
          && !pos.see_ge(move))
          continue;
@ -1574,6 +1568,7 @@ moves_loop: // When in check, search starts from here
                                                                [to_sq(move)];

      if (  !captureOrPromotion
+          && Search::prune_at_shallow_depth
          && moveCount
          && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
          && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
@ -1844,7 +1839,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
  size_t pvIdx = pos.this_thread()->pvIdx;
  size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size());
  uint64_t nodesSearched = Threads.nodes_searched();
-  uint64_t tbHits = Threads.tb_hits() + (pos.RootInTB ? rootMoves.size() : 0);
+  uint64_t tbHits = Threads.tb_hits() + (pos.this_thread()->rootInTB ? rootMoves.size() : 0);

  for (size_t i = 0; i < multiPV; ++i)
  {
@ -1859,7 +1854,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
      if (v == -VALUE_INFINITE)
          v = VALUE_ZERO;

-      bool tb = pos.RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
+      bool tb = pos.this_thread()->rootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
      v = tb ? rootMoves[i].tbScore : v;

      if (ss.rdbuf()->in_avail()) // Not at first line
@ -1926,34 +1921,34 @@ bool RootMove::extract_ponder_from_tt(Position& pos) {

 void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {

-    pos.RootInTB = false;
-    UseRule50 = bool(Options["Syzygy50MoveRule"]);
-    ProbeDepth = int(Options["SyzygyProbeDepth"]);
-    Cardinality = int(Options["SyzygyProbeLimit"]);
+    auto& rootInTB = pos.this_thread()->rootInTB;
+    auto& cardinality = pos.this_thread()->Cardinality;
+    auto& probeDepth = pos.this_thread()->ProbeDepth;
+    rootInTB = false;
    bool dtz_available = true;

    // Tables with fewer pieces than SyzygyProbeLimit are searched with
    // ProbeDepth == DEPTH_ZERO
-    if (Cardinality > MaxCardinality)
+    if (cardinality > Tablebases::MaxCardinality)
    {
-        Cardinality = MaxCardinality;
-        ProbeDepth = 0;
+        cardinality = Tablebases::MaxCardinality;
+        probeDepth = 0;
    }

-    if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
+    if (cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
    {
        // Rank moves using DTZ tables
-        pos.RootInTB = root_probe(pos, rootMoves);
+        rootInTB = root_probe(pos, rootMoves);

-        if (!pos.RootInTB)
+        if (!rootInTB)
        {
            // DTZ tables are missing; try to rank moves using WDL tables
            dtz_available = false;
-            pos.RootInTB = root_probe_wdl(pos, rootMoves);
+            rootInTB = root_probe_wdl(pos, rootMoves);
        }
    }

-    if (pos.RootInTB)
+    if (rootInTB)
    {
        // Sort moves according to TB rank
        std::stable_sort(rootMoves.begin(), rootMoves.end(),
@ -1961,7 +1956,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {

        // Probe during search only if DTZ is not available and we are winning
        if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW)
-            Cardinality = 0;
+            cardinality = 0;
    }
    else
    {
@ -1969,11 +1964,11 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
        for (auto& m : rootMoves)
            m.tbRank = 0;
    }
+
 }

 // --- expose the functions such as fixed depth search used for learning to the outside

-#if defined (EVAL_LEARN)

 namespace Learner
 {
@ -1991,39 +1986,6 @@ namespace Learner

    std::memset(ss - 7, 0, 10 * sizeof(Stack));

-    // About Search::Limits
-    // Be careful because this member variable is global and affects other threads.
-    {
-      auto& limits = Search::Limits;
-
-      // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
-      limits.infinite = true;
-
-      // Since PV is an obstacle when displayed, erase it.
-      limits.silent = true;
-
-      // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
-      limits.nodes = 0;
-
-      // depth is also processed by the one passed as an argument of Learner::search().
-      limits.depth = 0;
-
-      // Set a large value to prevent the draw value from being returned due to the number of moves near the draw.
-      //limits.max_game_ply = 1 << 16;
-
-      // If you do not include the ball entry rule, it will be a draw and it will be difficult to settle.
-      //limits.enteringKingRule = EnteringKingRule::EKR_27_POINT;
-    }
-
-    // Set DrawValue
-    {
-      // Because it is not prepared for each thread
-      // May be overwritten by another thread. There is no help for it.
-      // If that happens, I think it should be 0.
-      //drawValueTable[REPETITION_DRAW][BLACK] = VALUE_ZERO;
-      //drawValueTable[REPETITION_DRAW][WHITE] = VALUE_ZERO;
-    }
-
    // Regarding this_thread.

    {
@ -2035,7 +1997,7 @@ namespace Learner
      th->nmpMinPly = th->bestMoveChanges = 0;
      th->ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;

-	  // Zero initialization of the number of search nodes
+      // Zero initialization of the number of search nodes
      th->nodes = 0;

      // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood.
@ -2059,7 +2021,7 @@ namespace Learner
      for (int i = 7; i > 0; i--)
          (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel

- // set rootMoves
+      // set rootMoves
      auto& rootMoves = th->rootMoves;

      rootMoves.clear();
@ -2067,7 +2029,20 @@ namespace Learner
        rootMoves.push_back(Search::RootMove(m));

      assert(!rootMoves.empty());
-      TB::rank_root_moves(pos, rootMoves);
+
+      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
+      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
+      th->Cardinality = int(Options["SyzygyProbeLimit"]);
+
+      // Tables with fewer pieces than SyzygyProbeLimit are searched with
+      // ProbeDepth == DEPTH_ZERO
+      if (th->Cardinality > Tablebases::MaxCardinality)
+      {
+          th->Cardinality = Tablebases::MaxCardinality;
+          th->ProbeDepth = 0;
+      }
+
+      Tablebases::rank_root_moves(pos, rootMoves);
    }
  }

@ -2088,8 +2063,8 @@ namespace Learner
  // As it has a bad effect, I decided to stop allowing the window range to be specified.
  ValueAndPV qsearch(Position& pos)
  {
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
+    Stack stack[MAX_PLY+10], *ss = stack+7;
+    Move  pv[MAX_PLY+1];

    init_for_search(pos, ss);
    ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
@ -2108,7 +2083,7 @@ namespace Learner

    auto bestValue = ::qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0);

-  // Returns the PV obtained.
+    // Returns the PV obtained.
    std::vector<Move> pvs;
    for (Move* p = &ss->pv[0]; is_ok(*p); ++p)
      pvs.push_back(*p);
@ -2174,7 +2149,7 @@ namespace Learner
    Value bestValue = -VALUE_INFINITE;

    while ((rootDepth += 1) <= depth
-	  // exit this loop even if the node limit is exceeded
+      // exit this loop even if the node limit is exceeded
      // The number of search nodes is passed in the argument of this function.
      && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit)
      )
@ -2196,46 +2171,36 @@ namespace Learner
              break;
        }

-	    // selDepth output with USI info for each depth and PV line
+        // selDepth output with USI info for each depth and PV line
        selDepth = 0;

        // Switch to aspiration search for depth 5 and above.
-        if (rootDepth >= 5 * 1)
+        if (rootDepth >= 4)
        {
-          delta = Value(20);
-
-          Value p = rootMoves[pvIdx].previousScore;
-
-          alpha = std::max(p - delta, -VALUE_INFINITE);
-          beta = std::min(p + delta, VALUE_INFINITE);
+            Value prev = rootMoves[pvIdx].previousScore;
+            delta = Value(17);
+            alpha = std::max(prev - delta,-VALUE_INFINITE);
+            beta  = std::min(prev + delta, VALUE_INFINITE);
        }

-        // aspiration search
-        int failedHighCnt = 0;
        while (true)
        {
-          Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1);
+          Depth adjustedDepth = std::max(1, rootDepth);
          bestValue = ::search<PV>(pos, ss, alpha, beta, adjustedDepth, false);

          stable_sort(rootMoves.begin() + pvIdx, rootMoves.end());
          //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx);

-		  // Expand aspiration window for fail low/high.
+          // Expand aspiration window for fail low/high.
          // However, if it is the value specified by the argument, it will be treated as fail low/high and break.
          if (bestValue <= alpha)
          {
            beta = (alpha + beta) / 2;
            alpha = std::max(bestValue - delta, -VALUE_INFINITE);
-
-            failedHighCnt = 0;
-            //if (mainThread)
-            //    mainThread->stopOnPonderhit = false;
-
          }
          else if (bestValue >= beta)
          {
            beta = std::min(bestValue + delta, VALUE_INFINITE);
-            ++failedHighCnt;
          }
          else
            break;
@ -2256,7 +2221,6 @@ namespace Learner
    }

    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // ?¡L PV should not be NULL_MOVE because it is PV
    // MOVE_WIN has never been thrust. (For now)
    for (Move move : rootMoves[0].pv)
    {
@ -2274,4 +2238,3 @@ namespace Learner
  }

 }
-#endif
--- a/src/search.h
+++ b/src/search.h
@ -24,6 +24,7 @@
 #include "misc.h"
 #include "movepick.h"
 #include "types.h"
+#include "uci.h"

 class Position;

@ -32,6 +33,7 @@ namespace Search {
 /// Threshold used for countermoves based pruning
 constexpr int CounterMovePruneThreshold = 0;

+extern bool prune_at_shallow_depth;

 /// Stack struct keeps track of the information we need to remember from nodes
 /// shallower and deeper in the tree during the search. Each search thread has
@ -88,9 +90,7 @@ struct LimitsType {
    time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
    movestogo = depth = mate = perft = infinite = 0;
    nodes = 0;
-#if defined (EVAL_LEARN)
    silent = false;
-#endif
  }

  bool use_time_management() const {
@ -101,11 +101,9 @@ struct LimitsType {
  TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
  int movestogo, depth, mate, perft, infinite;
  int64_t nodes;
-#if defined (EVAL_LEARN)
  // Silent mode that does not output to the screen (for continuous self-play in process)
  // Do not output PV at this time.
  bool silent;
-#endif
 };

 extern LimitsType Limits;
@ -115,4 +113,18 @@ void clear();

 } // namespace Search

+namespace Tablebases {
+
+extern int MaxCardinality;
+
+}
+namespace Learner {
+
+  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
+  using ValueAndPV = std::pair<Value, std::vector<Move>>;
+
+  ValueAndPV qsearch(Position& pos);
+  ValueAndPV search(Position& pos, int depth_, size_t multiPV = 1, uint64_t nodesLimit = 0);
+}
+
 #endif // #ifndef SEARCH_H_INCLUDED
--- a/src/syzygy/tbprobe.h
+++ b/src/syzygy/tbprobe.h
@ -43,8 +43,6 @@ enum ProbeState {
    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };

-extern int MaxCardinality;
-
 void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
 int probe_dtz(Position& pos, ProbeState* result);
--- a/src/thread.cpp
+++ b/src/thread.cpp
@ -181,9 +181,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
          || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
          rootMoves.emplace_back(m);

-  if (!rootMoves.empty())
-      Tablebases::rank_root_moves(pos, rootMoves);
-
  // After ownership transfer 'states' becomes empty, so if we stop the search
  // and call 'go' again without setting a new position states.get() == NULL.
  assert(states.get() || setupStates.get());
@ -203,6 +200,21 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
      th->rootMoves = rootMoves;
      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
      th->rootState = setupStates->back();
+      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
+      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
+      th->Cardinality = int(Options["SyzygyProbeLimit"]);
+
+      // Tables with fewer pieces than SyzygyProbeLimit are searched with
+      // ProbeDepth == DEPTH_ZERO
+      if (th->Cardinality > Tablebases::MaxCardinality)
+      {
+          th->Cardinality = Tablebases::MaxCardinality;
+          th->ProbeDepth = 0;
+      }
+
+      if (!rootMoves.empty())
+          Tablebases::rank_root_moves(pos, rootMoves);
+
  }

  main()->start_searching();
--- a/src/thread.h
+++ b/src/thread.h
@ -73,6 +73,11 @@ public:
  CapturePieceToHistory captureHistory;
  ContinuationHistory continuationHistory[2][2];
  Score contempt;
+  bool rootInTB;
+  int Cardinality;
+  bool UseRule50;
+  Depth ProbeDepth;
+
 };


--- a/src/tt.cpp
+++ b/src/tt.cpp
@ -28,13 +28,16 @@

 TranspositionTable TT; // Our global transposition table

+bool TranspositionTable::enable_transposition_table = true;
+
 /// TTEntry::save() populates the TTEntry with a new node's data, possibly
 /// overwriting an old position. Update is not atomic and can be racy.

 void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
-  if (Options["Training"])
-    return;

+  if (!TranspositionTable::enable_transposition_table) {
+      return;
+  }
  // Preserve any existing move for the same position
  if (m || (uint16_t)k != key16)
      move16 = (uint16_t)m;
@ -117,8 +120,11 @@ void TranspositionTable::clear() {
 /// TTEntry t2 if its replace value is greater than that of t2.

 TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
-  if (Options["Training"])
-    return found = false, first_entry(0);
+
+  if (!enable_transposition_table) {
+      found = false;
+      return first_entry(0);
+  }

  TTEntry* const tte = first_entry(key);
  const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster
--- a/src/tt.h
+++ b/src/tt.h
@ -84,6 +84,8 @@ public:
    return &table[mul_hi64(key, clusterCount)].entry[0];
  }

+  static bool enable_transposition_table;
+
 private:
  friend struct TTEntry;

--- a/src/uci.cpp
+++ b/src/uci.cpp
@ -33,6 +33,10 @@
 #include "tt.h"
 #include "uci.h"

+#include "learn/gensfen.h"
+#include "learn/learn.h"
+#include "learn/convert.h"
+
 using namespace std;

 extern vector<string> setup_bench(const Position&, istream&);
@ -40,25 +44,6 @@ extern vector<string> setup_bench(const Position&, istream&);
 // FEN string of the initial position, normal chess
 const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";

-// Command to automatically generate a game record
-#if defined (EVAL_LEARN)
-namespace Learner
-{
-  // Automatic generation of teacher position
-  void gen_sfen(Position& pos, istringstream& is);
-
-  // Learning from the generated game record
-  void learn(Position& pos, istringstream& is);
-
-  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
-  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-  ValueAndPV qsearch(Position& pos);
-  ValueAndPV search(Position& pos, int depth_, size_t multiPV = 1, uint64_t nodesLimit = 0);
-
-}
-#endif
-
 void test_cmd(Position& pos, istringstream& is)
 {
    // Initialize as it may be searched.
@ -70,7 +55,7 @@ void test_cmd(Position& pos, istringstream& is)
    if (param == "nnue") Eval::NNUE::TestCommand(pos, is);
 }

-namespace UCI {
+namespace {

  // position() is called when engine receives the "position" UCI command.
  // The function sets up the position described in the given FEN string ("fen")
@ -225,42 +210,41 @@ namespace UCI {
         << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
  }

-  // The win rate model returns the probability (per mille) of winning given an eval
-  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
-  int win_rate_model(Value v, int ply) {
-     // Return win rate in per mille (rounded to nearest)
-     return int(0.5 + win_rate_model_double(v, ply));
-  }
-
-  // The win rate model returns the probability (per mille) of winning given an eval
-  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
-  double win_rate_model_double(double v, int ply) {
-
-     // The model captures only up to 240 plies, so limit input (and rescale)
-     double m = std::min(240, ply) / 64.0;
-
-     // Coefficients of a 3rd order polynomial fit based on fishtest data
-     // for two parameters needed to transform eval to the argument of a
-     // logistic function.
-     double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
-     double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
-     double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
-     double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
-
-     // Transform eval to centipawns with limited range
-       double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
-
-     // Return win rate in per mille
-     return 1000.0 / (1 + std::exp((a - x) / b));
-  }
-
 } // namespace

+// The win rate model returns the probability (per mille) of winning given an eval
+// and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+int UCI::win_rate_model(Value v, int ply) {
+   // Return win rate in per mille (rounded to nearest)
+   return int(0.5 + win_rate_model_double(v, ply));
+}
+
+// The win rate model returns the probability (per mille) of winning given an eval
+// and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+double UCI::win_rate_model_double(double v, int ply) {
+
+   // The model captures only up to 240 plies, so limit input (and rescale)
+   double m = std::min(240, ply) / 64.0;
+
+   // Coefficients of a 3rd order polynomial fit based on fishtest data
+   // for two parameters needed to transform eval to the argument of a
+   // logistic function.
+   double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
+   double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
+   double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
+   double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
+
+   // Transform eval to centipawns with limited range
+     double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
+
+   // Return win rate in per mille
+   return 1000.0 / (1 + std::exp((a - x) / b));
+}
+
 // --------------------
 // Call qsearch(),search() directly for testing
 // --------------------

-#if defined(EVAL_LEARN)
 void qsearch_cmd(Position& pos)
 {
  cout << "qsearch : ";
@ -292,8 +276,6 @@ void search_cmd(Position& pos, istringstream& is)
  cout << endl;
 }

-#endif
-
 /// UCI::loop() waits for a command from stdin, parses it and calls the appropriate
 /// function. Also intercepts EOF from stdin to ensure gracefully exiting if the
 /// GUI dies unexpectedly. When called with some command line arguments, e.g. to
@ -349,16 +331,15 @@ void UCI::loop(int argc, char* argv[]) {
      else if (token == "d")        sync_cout << pos << sync_endl;
      else if (token == "eval")     trace_eval(pos);
      else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
-#if defined (EVAL_LEARN)
+
      else if (token == "gensfen") Learner::gen_sfen(pos, is);
      else if (token == "learn") Learner::learn(pos, is);
+      else if (token == "convert") Learner::convert(is);

      // Command to call qsearch(),search() directly for testing
      else if (token == "qsearch") qsearch_cmd(pos);
      else if (token == "search") search_cmd(pos, is);

-#endif
-
      // test command
      else if (token == "test") test_cmd(pos, is);
      else
--- a/src/uci.h
+++ b/src/uci.h
@ -72,6 +72,7 @@ std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
+int win_rate_model(Value v, int ply);
 double win_rate_model_double(double v, int ply);
 Move to_move(const Position& pos, std::string& str);

--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@ -43,6 +43,12 @@ void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
 void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
 void on_eval_file(const Option& ) { Eval::init_NNUE(); }
+void on_prune_at_shallow_depth(const Option& o) {
+    Search::prune_at_shallow_depth = o;
+}
+void on_enable_transposition_table(const Option& o) {
+    TranspositionTable::enable_transposition_table = o;
+}

 /// Our case insensitive less() function as required by UCI protocol
 bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@ -70,7 +76,6 @@ void init(OptionsMap& o) {
  o["Move Overhead"]         << Option(10, 0, 5000);
  o["Slow Mover"]            << Option(100, 10, 1000);
  o["nodestime"]             << Option(0, 0, 10000);
-  o["Training"]              << Option(false);
  o["UCI_Chess960"]          << Option(false);
  o["UCI_AnalyseMode"]       << Option(false);
  o["UCI_LimitStrength"]     << Option(false);
@ -80,7 +85,7 @@ void init(OptionsMap& o) {
  o["SyzygyProbeDepth"]      << Option(1, 1, 100);
  o["Syzygy50MoveRule"]      << Option(true);
  o["SyzygyProbeLimit"]      << Option(7, 0, 7);
-  o["Use NNUE"]              << Option(true, on_use_NNUE);
+  o["Use NNUE"]              << Option("true var true var false var pure", "true", on_use_NNUE);
  o["EvalFile"]              << Option(EvalFileDefaultName, on_eval_file);
  // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
  // I want to hit the test eval convert command, but there is no new evaluation function
@ -88,12 +93,14 @@ void init(OptionsMap& o) {
  // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame,
  // Hit the test eval convert command.
  o["SkipLoadingEval"]       << Option(false);
-#if defined(EVAL_LEARN)
  // When learning the evaluation function, you can change the folder to save the evaluation function.
  // Evalsave by default. This folder shall be prepared in advance.
  // Automatically create a folder under this folder like "0/", "1/", ... and save the evaluation function file there.
  o["EvalSaveDir"] << Option("evalsave");
-#endif
+  // Prune at shallow depth on PV nodes. False is recommended when using fixed depth search.
+  o["PruneAtShallowDepth"] << Option(true, on_prune_at_shallow_depth);
+  // Enable transposition table.
+  o["EnableTranspositionTable"] << Option(true, on_enable_transposition_table);
 }


@ -147,7 +154,7 @@ Option::operator double() const {
 }

 Option::operator std::string() const {
-  assert(type == "string");
+  assert(type == "check" || type == "spin" || type == "combo" || type == "button" || type == "string");
  return currentValue;
 }

--- a/tests/instrumented_learn.sh
+++ b/tests/instrumented_learn.sh
@ -64,8 +64,8 @@ EOF
  ;;
 esac

-mkdir -p training_data_01
-mkdir -p training_data_02
+mkdir -p training_data
+mkdir -p validation_data

 # gensfen testing 01
 cat << EOF > gensfen01.exp
@ -78,7 +78,11 @@ cat << EOF > gensfen01.exp
 send "setoption name Threads value $threads\n"
 send "setoption name Use NNUE value false\n"
 send "isready\n"
- send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data_01/training_data.bin use_raw_nnue_eval 0\n"
+ send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin use_raw_nnue_eval 0 sfen_format bin\n"
+ expect "gensfen finished."
+ send "learn training_data/training_data.bin convert_plain output_file_name training_data.txt\n"
+ expect "all done"
+ send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n"
 expect "gensfen finished."

 send "quit\n"
@ -100,7 +104,9 @@ cat << EOF > gensfen02.exp
 send "setoption name Threads value $threads\n"
 send "setoption name Use NNUE value true\n"
 send "isready\n"
- send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data_02/training_data.bin use_raw_nnue_eval 0\n"
+ send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin use_raw_nnue_eval 0 sfen_format bin\n"
+ expect "gensfen finished."
+ send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n"
 expect "gensfen finished."

 send "quit\n"
@ -111,7 +117,30 @@ cat << EOF > gensfen02.exp
 exit \$value
 EOF

-for exp in gensfen01.exp gensfen02.exp
+# simple learning
+cat << EOF > learn01.exp
+ set timeout 240
+ spawn $exeprefix ./stockfish
+
+ send "uci\n"
+ send "setoption name SkipLoadingEval value true\n"
+ send "setoption name Use NNUE value true\n"
+ send "setoption name Threads value $threads\n"
+ send "isready\n"
+ send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 mirror_percentage 50 validation_set_file_name validation_data/validation_data.bin\n"
+
+ expect "save_eval() finished."
+
+ send "quit\n"
+ expect eof
+
+ # return error code of the spawned program, useful for valgrind
+ lassign [wait] pid spawnid os_error_flag value
+ exit \$value
+
+EOF
+
+for exp in gensfen01.exp gensfen02.exp learn01.exp
 do

  echo "$prefix expect $exp $postfix"