improve storm selector consistency

by filtering puzzles before sampling
2021-01-26 11:35:33 +01:00 · 2021-01-26 11:35:33 +01:00 · b5a4d350e5
parent 1e272929b5
commit b5a4d350e5
2 changed files with 51 additions and 52 deletions
--- a/modules/storm/src/main/StormHigh.scala
+++ b/modules/storm/src/main/StormHigh.scala
@ -78,7 +78,6 @@ final class StormHighApi(coll: Coll, cacheApi: CacheApi)(implicit ctx: Execution
        ) -> Nil
      }
      .map2 { doc =>
-        println(lila.db.BSON.debug(doc))
        def readScore(doc: Bdoc, field: String) =
          ~doc.getAsOpt[List[Bdoc]](field).flatMap(_.headOption).flatMap(_.getAsOpt[Int]("score"))
        StormHigh(
--- a/modules/storm/src/main/StormSelector.scala
+++ b/modules/storm/src/main/StormSelector.scala
@ -19,12 +19,14 @@ final class StormSelector(colls: PuzzleColls, cacheApi: CacheApi)(implicit ec: E

  def apply: Fu[List[StormPuzzle]] = current.get {}

-  private val poolSize = 130
-  private val theme    = lila.puzzle.PuzzleTheme.mix.key.value
-  private val tier     = lila.puzzle.PuzzleTier.Good.key
+  private val poolSize     = 130
+  private val theme        = lila.puzzle.PuzzleTheme.mix.key.value
+  private val tier         = lila.puzzle.PuzzleTier.Good.key
+  private val maxDeviation = 90

-  private val ratings       = (1000 to 2800 by 150).toList
-  private val ratingBuckets = ratings.size
+  private val ratings          = (1000 to 2800 by 150).toList
+  private val ratingBuckets    = ratings.size
+  private val puzzlesPerBucket = poolSize / ratingBuckets

  private val current = cacheApi.unit[List[StormPuzzle]] {
    _.refreshAfterWrite(6 seconds)
@ -39,64 +41,62 @@ final class StormSelector(colls: PuzzleColls, cacheApi: CacheApi)(implicit ec: E
                    Match(
                      $doc(
                        "min" $lte f"${theme}_${tier}_${rating}%04d",
-                        "max" $gt f"${theme}_${tier}_${rating}%04d"
+                        "max" $gte f"${theme}_${tier}_${rating}%04d"
                      )
                    ),
                    Project($doc("_id" -> false, "ids" -> true)),
                    Sample(1),
                    UnwindField("ids"),
-                    Sample((poolSize * 5) / ratingBuckets),
-                    Group(BSONNull)("ids" -> PushField("ids"))
+                    Sample(puzzlesPerBucket * 6), // ensure we have enough after filtering deviation & color
+                    PipelineOperator(
+                      $doc(
+                        "$lookup" -> $doc(
+                          "from" -> colls.puzzle.name.value,
+                          "as"   -> "puzzle",
+                          "let"  -> $doc("id" -> "$ids"),
+                          "pipeline" -> $arr(
+                            $doc(
+                              "$match" -> $doc(
+                                "$expr" -> $doc(
+                                  "$and" -> $arr(
+                                    $doc("$eq"  -> $arr("$_id", "$$id")),
+                                    $doc("$lte" -> $arr("$glicko.d", maxDeviation)),
+                                    $doc(
+                                      "$regexMatch" -> $doc(
+                                        "input" -> "$fen",
+                                        "regex" -> {
+                                          if (scala.util.Random.nextBoolean()) " w " else " b "
+                                        }
+                                      )
+                                    )
+                                  )
+                                )
+                              )
+                            ),
+                            $doc(
+                              "$project" -> $doc(
+                                "fen"    -> true,
+                                "line"   -> true,
+                                "rating" -> $doc("$toInt" -> "$glicko.r")
+                              )
+                            )
+                          )
+                        )
+                      )
+                    ),
+                    UnwindField("puzzle"),
+                    Sample(puzzlesPerBucket),
+                    ReplaceRootField("puzzle")
                  )
                }
              ) -> List(
                Project($doc("all" -> $doc("$setUnion" -> ratings.map(r => s"$$$r")))),
                UnwindField("all"),
-                UnwindField("all.ids"),
-                Project($doc("id" -> "$all.ids")),
-                PipelineOperator(
-                  $doc(
-                    "$lookup" -> $doc(
-                      "from" -> colls.puzzle.name.value,
-                      "as"   -> "puzzle",
-                      "let"  -> $doc("id" -> "$id"),
-                      "pipeline" -> $arr(
-                        $doc(
-                          "$match" -> $doc(
-                            "$expr" -> $doc(
-                              "$and" -> $arr(
-                                $doc("$eq" -> $arr("$_id", "$$id")),
-                                $doc("$lt" -> $arr("$glicko.d", 90)),
-                                $doc(
-                                  "$regexMatch" -> $doc(
-                                    "input" -> "$fen",
-                                    "regex" -> {
-                                      if (scala.util.Random.nextBoolean()) " w " else " b "
-                                    }
-                                  )
-                                )
-                              )
-                            )
-                          )
-                        ),
-                        $doc(
-                          "$project" -> $doc(
-                            "fen"    -> true,
-                            "line"   -> true,
-                            "rating" -> $doc("$toInt" -> "$glicko.r")
-                          )
-                        )
-                      )
-                    )
-                  )
-                ),
-                UnwindField("puzzle"),
-                ReplaceRootField("puzzle"),
-                Sample(poolSize),
+                ReplaceRootField("all"),
                Sort(Ascending("rating"))
              )
-            }.map { docs =>
-              docs.flatMap(StormPuzzleBSONReader.readOpt)
+            }.map {
+              _.flatMap(StormPuzzleBSONReader.readOpt)
            }
          }
          .mon(_.storm.selector.time)