lila/modules/insight/src/main/AggregationPipeline.scala

329 lines
12 KiB
Scala
Raw Normal View History

2015-11-26 21:05:59 -07:00
package lila.insight
2019-11-29 19:16:11 -07:00
import reactivemongo.api.bson._
2016-04-02 00:11:09 -06:00
import lila.db.dsl._
2019-12-03 12:15:15 -07:00
import lila.user.User
final private class AggregationPipeline(store: Storage)(implicit ec: scala.concurrent.ExecutionContext) {
2019-12-03 12:15:15 -07:00
2020-05-05 22:11:15 -06:00
def aggregate[X](question: Question[X], user: User): Fu[List[Bdoc]] =
store.coll {
_.aggregateList(
maxDocs = Int.MaxValue,
allowDiskUse = true
) { implicit framework =>
import framework._
import question.{ dimension, filters, metric }
2020-05-05 22:11:15 -06:00
import lila.insight.{ Dimension => D, Metric => M }
2021-01-22 01:18:06 -07:00
import InsightEntry.{ BSONFields => F }
2020-05-05 22:11:15 -06:00
import Storage._
2019-12-03 12:15:15 -07:00
2020-05-27 12:41:50 -06:00
val sampleGames = Sample(10_000)
val sampleMoves = Sample(200_000).some
2020-05-05 22:11:15 -06:00
val unwindMoves = UnwindField(F.moves).some
val sortNb = Sort(Descending("nb")).some
def limit(nb: Int) = Limit(nb).some
2019-12-03 12:15:15 -07:00
2020-05-05 22:11:15 -06:00
val regroupStacked = GroupField("_id.dimension")(
"nb" -> SumField("v"),
"ids" -> FirstField("ids"),
"stack" -> Push($doc("metric" -> "$_id.metric", "v" -> "$v"))
)
2020-05-05 22:11:15 -06:00
lazy val movetimeIdDispatcher =
MovetimeRange.reversedNoInf.foldLeft[BSONValue](BSONInteger(MovetimeRange.MTRInf.id)) {
case (acc, mtr) =>
2019-12-13 07:30:20 -07:00
$doc(
"$cond" -> $arr(
$doc("$lt" -> $arr("$" + F.moves("t"), mtr.tenths)),
2020-05-05 22:11:15 -06:00
mtr.id,
2019-12-13 07:30:20 -07:00
acc
)
)
}
lazy val cplIdDispatcher =
CplRange.all.reverse.foldLeft[BSONValue](BSONInteger(CplRange.worse.cpl)) { case (acc, cpl) =>
$doc(
"$cond" -> $arr(
$doc("$lte" -> $arr("$" + F.moves("c"), cpl.cpl)),
cpl.cpl,
acc
)
)
}
2020-05-05 22:11:15 -06:00
lazy val materialIdDispatcher = $doc(
"$cond" -> $arr(
$doc("$eq" -> $arr("$" + F.moves("i"), 0)),
MaterialRange.Equal.id,
MaterialRange.reversedButEqualAndLast.foldLeft[BSONValue](BSONInteger(MaterialRange.Up4.id)) {
case (acc, mat) =>
$doc(
"$cond" -> $arr(
$doc((if (mat.negative) "$lt" else "$lte") -> $arr("$" + F.moves("i"), mat.imbalance)),
mat.id,
acc
)
)
}
)
)
lazy val evalIdDispatcher =
2021-07-02 23:18:25 -06:00
EvalRange.reversedButLast.foldLeft[BSONValue](BSONInteger(EvalRange.Up5.id)) { case (acc, ev) =>
$doc(
"$cond" -> $arr(
$doc("$lt" -> $arr("$" + F.moves("e"), ev.eval)),
ev.id,
acc
)
)
}
2020-05-28 12:47:06 -06:00
lazy val timeVarianceIdDispatcher =
TimeVariance.all.reverse
.drop(1)
2020-09-21 01:28:28 -06:00
.foldLeft[BSONValue](BSONInteger(TimeVariance.VeryVariable.intFactored)) { case (acc, tvi) =>
$doc(
"$cond" -> $arr(
$doc("$lte" -> $arr("$" + F.moves("v"), tvi.intFactored)),
tvi.intFactored,
acc
2020-05-28 12:47:06 -06:00
)
2020-09-21 01:28:28 -06:00
)
2020-05-28 12:47:06 -06:00
}
2020-05-05 22:11:15 -06:00
def dimensionGroupId(dim: Dimension[_]): BSONValue =
dim match {
case Dimension.MovetimeRange => movetimeIdDispatcher
case Dimension.CplRange => cplIdDispatcher
2020-05-05 22:11:15 -06:00
case Dimension.MaterialRange => materialIdDispatcher
case Dimension.EvalRange => evalIdDispatcher
2020-05-28 12:47:06 -06:00
case Dimension.TimeVariance => timeVarianceIdDispatcher
2020-05-05 22:11:15 -06:00
case d => BSONString("$" + d.dbKey)
}
sealed trait Grouping
object Grouping {
object Group extends Grouping
case class BucketAuto(buckets: Int, granularity: Option[String] = None) extends Grouping
}
def dimensionGrouping(dim: Dimension[_]): Grouping =
dim match {
case D.Date => Grouping.BucketAuto(buckets = 12)
case _ => Grouping.Group
}
2017-04-10 12:08:44 -06:00
2020-05-05 22:11:15 -06:00
val gameIdsSlice = $doc("ids" -> $doc("$slice" -> $arr("$ids", 4)))
val includeSomeGameIds = AddFields(gameIdsSlice)
val toPercent = $doc("v" -> $doc("$multiply" -> $arr(100, $doc("$avg" -> "$v"))))
2020-05-05 22:11:15 -06:00
def group(d: Dimension[_], f: GroupFunction): List[Option[PipelineOperator]] =
List(dimensionGrouping(d) match {
case Grouping.Group =>
Group(dimensionGroupId(d))(
"v" -> f,
"nb" -> SumAll,
2019-12-13 07:30:20 -07:00
"ids" -> AddFieldToSet("_id")
2020-05-05 22:11:15 -06:00
)
case Grouping.BucketAuto(buckets, granularity) =>
2019-12-13 07:30:20 -07:00
BucketAuto(dimensionGroupId(d), buckets, granularity)(
2020-05-05 22:11:15 -06:00
"v" -> f,
"nb" -> SumAll,
"ids" -> AddFieldToSet("_id") // AddFieldToSet crashes mongodb 3.4.1 server
)
}) map { Option(_) }
def groupMulti(d: Dimension[_], metricDbKey: String): List[Option[PipelineOperator]] =
(dimensionGrouping(d) match {
case Grouping.Group =>
List[PipelineOperator](
Group($doc("dimension" -> dimensionGroupId(d), "metric" -> s"$$$metricDbKey"))(
"v" -> SumAll,
"ids" -> AddFieldToSet("_id")
),
regroupStacked,
includeSomeGameIds
)
case Grouping.BucketAuto(buckets, granularity) =>
List[PipelineOperator](
BucketAuto(dimensionGroupId(d), buckets, granularity)(
"doc" -> Push(
$doc(
"id" -> "$_id",
"metric" -> s"$$$metricDbKey"
)
2019-12-13 07:30:20 -07:00
)
2020-05-05 22:11:15 -06:00
),
UnwindField("doc"),
Group($doc("dimension" -> "$_id", "metric" -> "$doc.metric"))(
"v" -> SumAll,
"ids" -> AddFieldToSet("doc.id")
),
regroupStacked,
includeSomeGameIds,
Sort(Ascending("_id.min"))
)
}) map { Option(_) }
2020-05-05 22:11:15 -06:00
val gameMatcher = combineDocs(question.filters.collect {
case f if f.dimension.isInGame => f.matcher
})
2020-07-19 08:55:19 -06:00
def matchMoves(extraMatcher: Bdoc = $empty): Option[PipelineOperator] =
2020-05-05 22:11:15 -06:00
combineDocs(extraMatcher :: question.filters.collect {
case f if f.dimension.isInMove => f.matcher
2020-05-29 20:32:11 -06:00
} ::: (dimension match {
case D.TimeVariance => List($doc(F.moves("v") $exists true))
case D.CplRange => List($doc(F.moves("c") $exists true))
case D.EvalRange => List($doc(F.moves("e") $exists true))
2020-07-19 08:55:19 -06:00
case _ => List.empty[Bdoc]
})).some.filterNot(_.isEmpty) map Match.apply
def projectForMove: Option[PipelineOperator] =
2020-05-05 22:11:15 -06:00
Project(BSONDocument({
metric.dbKey :: dimension.dbKey :: filters.collect {
case lila.insight.Filter(d, _) if d.isInMove => d.dbKey
}
}.distinct.map(_ -> BSONBoolean(true)))).some
2020-05-27 12:41:50 -06:00
val pipeline = Match(
2020-05-05 22:11:15 -06:00
selectUserId(user.id) ++
gameMatcher ++
(dimension == Dimension.Opening).??($doc(F.eco $exists true)) ++
(Metric.requiresAnalysis(metric) || Dimension.requiresAnalysis(dimension))
.??($doc(F.analysed -> true)) ++
2020-05-05 22:11:15 -06:00
(Metric.requiresStableRating(metric) || Dimension.requiresStableRating(dimension)).?? {
$doc(F.provisional $ne true)
}
) -> /* sortDate :: */ {
sampleGames :: ((metric match {
case M.MeanCpl =>
2019-12-13 07:30:20 -07:00
List(
2020-05-05 22:11:15 -06:00
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves
) :::
group(dimension, AvgField(F.moves("c"))) :::
List(includeSomeGameIds.some)
case M.CplBucket =>
List(
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves,
AddFields($doc("cplBucket" -> cplIdDispatcher)).some
) :::
groupMulti(dimension, "cplBucket")
2020-05-05 22:11:15 -06:00
case M.Material =>
List(
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves
) :::
group(dimension, AvgField(F.moves("i"))) :::
List(includeSomeGameIds.some)
case M.Opportunism =>
List(
projectForMove,
unwindMoves,
2020-05-29 20:32:11 -06:00
matchMoves($doc(F.moves("o") $exists true)),
2020-05-05 22:11:15 -06:00
sampleMoves
) :::
group(dimension, GroupFunction("$push", $doc("$cond" -> $arr("$" + F.moves("o"), 1, 0)))) :::
List(AddFields(gameIdsSlice ++ toPercent).some)
case M.Luck =>
List(
projectForMove,
unwindMoves,
matchMoves($doc(F.moves("l") $exists true)),
sampleMoves
) :::
group(dimension, GroupFunction("$push", $doc("$cond" -> $arr("$" + F.moves("l"), 1, 0)))) :::
List(AddFields(gameIdsSlice ++ toPercent).some)
2020-05-27 21:08:37 -06:00
case M.Blurs =>
List(
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves
) :::
group(dimension, GroupFunction("$push", $doc("$cond" -> $arr("$" + F.moves("b"), 1, 0)))) :::
List(AddFields(gameIdsSlice ++ toPercent).some)
2020-05-05 22:11:15 -06:00
case M.NbMoves =>
List(
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves
) :::
group(dimension, SumAll) :::
List(
Project(
$doc(
"v" -> true,
"ids" -> true,
"nb" -> $doc("$size" -> "$ids")
)
).some,
AddFields(
$doc("v" -> $doc("$divide" -> $arr("$v", "$nb"))) ++
gameIdsSlice
).some
2019-12-13 07:30:20 -07:00
)
2020-05-05 22:11:15 -06:00
case M.Movetime =>
List(
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves
2019-12-13 07:30:20 -07:00
) :::
2020-05-05 22:11:15 -06:00
group(
dimension,
GroupFunction(
"$avg",
$doc("$divide" -> $arr("$" + F.moves("t"), 10))
)
) :::
List(includeSomeGameIds.some)
case M.RatingDiff =>
group(dimension, AvgField(F.ratingDiff)) ::: List(includeSomeGameIds.some)
case M.OpponentRating =>
group(dimension, AvgField(F.opponentRating)) ::: List(includeSomeGameIds.some)
case M.Result =>
groupMulti(dimension, F.result)
case M.Termination =>
groupMulti(dimension, F.termination)
case M.PieceRole =>
List(
projectForMove,
unwindMoves,
matchMoves(),
sampleMoves
) :::
groupMulti(dimension, F.moves("r"))
2020-05-28 12:47:06 -06:00
case M.TimeVariance =>
List(
projectForMove,
unwindMoves,
2020-05-29 20:32:11 -06:00
matchMoves($doc(F.moves("v") $exists true)),
2020-05-28 12:47:06 -06:00
sampleMoves
) :::
group(
dimension,
GroupFunction(
"$avg",
$doc("$divide" -> $arr("$" + F.moves("v"), TimeVariance.intFactor))
)
) :::
List(includeSomeGameIds.some)
2020-05-05 22:11:15 -06:00
}) ::: (dimension match {
case D.Opening => List(sortNb, limit(12))
case _ => Nil
})).flatten
}
2020-05-27 12:41:50 -06:00
pipeline
2019-12-13 07:30:20 -07:00
}
2019-12-03 12:15:15 -07:00
}
}