From b5a4d350e5b5d15584986e7ae0729af73c9ac11d Mon Sep 17 00:00:00 2001
From: Thibault Duplessis <t@lichess.org>
Date: Tue, 26 Jan 2021 11:35:33 +0100
Subject: [PATCH] improve storm selector consistency

by filtering puzzles before sampling
---
 modules/storm/src/main/StormHigh.scala     |   1 -
 modules/storm/src/main/StormSelector.scala | 102 ++++++++++-----------
 2 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/modules/storm/src/main/StormHigh.scala b/modules/storm/src/main/StormHigh.scala
index 155be18b4d..e06ab28728 100644
--- a/modules/storm/src/main/StormHigh.scala
+++ b/modules/storm/src/main/StormHigh.scala
@@ -78,7 +78,6 @@ final class StormHighApi(coll: Coll, cacheApi: CacheApi)(implicit ctx: Execution
         ) -> Nil
       }
       .map2 { doc =>
-        println(lila.db.BSON.debug(doc))
         def readScore(doc: Bdoc, field: String) =
           ~doc.getAsOpt[List[Bdoc]](field).flatMap(_.headOption).flatMap(_.getAsOpt[Int]("score"))
         StormHigh(
diff --git a/modules/storm/src/main/StormSelector.scala b/modules/storm/src/main/StormSelector.scala
index 7a4f26c04c..e463ab26c1 100644
--- a/modules/storm/src/main/StormSelector.scala
+++ b/modules/storm/src/main/StormSelector.scala
@@ -19,12 +19,14 @@ final class StormSelector(colls: PuzzleColls, cacheApi: CacheApi)(implicit ec: E
 
   def apply: Fu[List[StormPuzzle]] = current.get {}
 
-  private val poolSize = 130
-  private val theme    = lila.puzzle.PuzzleTheme.mix.key.value
-  private val tier     = lila.puzzle.PuzzleTier.Good.key
+  private val poolSize     = 130
+  private val theme        = lila.puzzle.PuzzleTheme.mix.key.value
+  private val tier         = lila.puzzle.PuzzleTier.Good.key
+  private val maxDeviation = 90
 
-  private val ratings       = (1000 to 2800 by 150).toList
-  private val ratingBuckets = ratings.size
+  private val ratings          = (1000 to 2800 by 150).toList
+  private val ratingBuckets    = ratings.size
+  private val puzzlesPerBucket = poolSize / ratingBuckets
 
   private val current = cacheApi.unit[List[StormPuzzle]] {
     _.refreshAfterWrite(6 seconds)
@@ -39,64 +41,62 @@ final class StormSelector(colls: PuzzleColls, cacheApi: CacheApi)(implicit ec: E
                     Match(
                       $doc(
                         "min" $lte f"${theme}_${tier}_${rating}%04d",
-                        "max" $gt f"${theme}_${tier}_${rating}%04d"
+                        "max" $gte f"${theme}_${tier}_${rating}%04d"
                       )
                     ),
                     Project($doc("_id" -> false, "ids" -> true)),
                     Sample(1),
                     UnwindField("ids"),
-                    Sample((poolSize * 5) / ratingBuckets),
-                    Group(BSONNull)("ids" -> PushField("ids"))
+                    Sample(puzzlesPerBucket * 6), // ensure we have enough after filtering deviation & color
+                    PipelineOperator(
+                      $doc(
+                        "$lookup" -> $doc(
+                          "from" -> colls.puzzle.name.value,
+                          "as"   -> "puzzle",
+                          "let"  -> $doc("id" -> "$ids"),
+                          "pipeline" -> $arr(
+                            $doc(
+                              "$match" -> $doc(
+                                "$expr" -> $doc(
+                                  "$and" -> $arr(
+                                    $doc("$eq"  -> $arr("$_id", "$$id")),
+                                    $doc("$lte" -> $arr("$glicko.d", maxDeviation)),
+                                    $doc(
+                                      "$regexMatch" -> $doc(
+                                        "input" -> "$fen",
+                                        "regex" -> {
+                                          if (scala.util.Random.nextBoolean()) " w " else " b "
+                                        }
+                                      )
+                                    )
+                                  )
+                                )
+                              )
+                            ),
+                            $doc(
+                              "$project" -> $doc(
+                                "fen"    -> true,
+                                "line"   -> true,
+                                "rating" -> $doc("$toInt" -> "$glicko.r")
+                              )
+                            )
+                          )
+                        )
+                      )
+                    ),
+                    UnwindField("puzzle"),
+                    Sample(puzzlesPerBucket),
+                    ReplaceRootField("puzzle")
                   )
                 }
               ) -> List(
                 Project($doc("all" -> $doc("$setUnion" -> ratings.map(r => s"$$$r")))),
                 UnwindField("all"),
-                UnwindField("all.ids"),
-                Project($doc("id" -> "$all.ids")),
-                PipelineOperator(
-                  $doc(
-                    "$lookup" -> $doc(
-                      "from" -> colls.puzzle.name.value,
-                      "as"   -> "puzzle",
-                      "let"  -> $doc("id" -> "$id"),
-                      "pipeline" -> $arr(
-                        $doc(
-                          "$match" -> $doc(
-                            "$expr" -> $doc(
-                              "$and" -> $arr(
-                                $doc("$eq" -> $arr("$_id", "$$id")),
-                                $doc("$lt" -> $arr("$glicko.d", 90)),
-                                $doc(
-                                  "$regexMatch" -> $doc(
-                                    "input" -> "$fen",
-                                    "regex" -> {
-                                      if (scala.util.Random.nextBoolean()) " w " else " b "
-                                    }
-                                  )
-                                )
-                              )
-                            )
-                          )
-                        ),
-                        $doc(
-                          "$project" -> $doc(
-                            "fen"    -> true,
-                            "line"   -> true,
-                            "rating" -> $doc("$toInt" -> "$glicko.r")
-                          )
-                        )
-                      )
-                    )
-                  )
-                ),
-                UnwindField("puzzle"),
-                ReplaceRootField("puzzle"),
-                Sample(poolSize),
+                ReplaceRootField("all"),
                 Sort(Ascending("rating"))
               )
-            }.map { docs =>
-              docs.flatMap(StormPuzzleBSONReader.readOpt)
+            }.map {
+              _.flatMap(StormPuzzleBSONReader.readOpt)
             }
           }
           .mon(_.storm.selector.time)