Add method to compute stats (rowcount, size) of all db columns

pull/7/head
Martin Boehm 2018-06-01 16:01:58 +02:00
parent eba668bfd1
commit 4f42d2f1d6
3 changed files with 68 additions and 3 deletions

View File

@ -70,6 +70,8 @@ var (
coin = flag.String("coin", "btc", "coin name")
noTxCache = flag.Bool("notxcache", false, "disable tx cache")
computeColumnStats = flag.Bool("computedbstats", false, "compute column stats and exit")
)
var (
@ -162,13 +164,24 @@ func main() {
internalState, err = newInternalState(*coin, index)
if err != nil {
glog.Fatal("internalState: ", err)
glog.Error("internalState: ", err)
return
}
index.SetInternalState(internalState)
if internalState.DbState != common.DbStateClosed {
glog.Warning("internalState: database in not closed state ", internalState.DbState, ", possibly previous ungraceful shutdown")
}
if *computeColumnStats {
internalState.DbState = common.DbStateOpen
err = index.ComputeInternalStateColumnStats()
if err != nil {
glog.Error("internalState: ", err)
}
glog.Info("DB size on disk: ", index.DatabaseSizeOnDisk())
return
}
syncWorker, err = db.NewSyncWorker(index, chain, *syncWorkers, *syncChunk, *blockFrom, *dryRun, chanOsSignal, metrics, internalState)
if err != nil {
glog.Fatalf("NewSyncWorker %v", err)

View File

@ -105,6 +105,14 @@ func (is *InternalState) AddDBColumnStats(c int, rowsDiff int64, keysSumDiff int
is.DbColumns[c].ValuesSum += valuesSumDiff
}
func (is *InternalState) SetDBColumnStats(c int, rowsDiff int64, keysSumDiff int64, valuesSumDiff int64) {
is.mux.Lock()
defer is.mux.Unlock()
is.DbColumns[c].Rows = rowsDiff
is.DbColumns[c].KeysSum = keysSumDiff
is.DbColumns[c].ValuesSum = valuesSumDiff
}
func (is *InternalState) Pack() ([]byte, error) {
is.mux.Lock()
defer is.mux.Unlock()

View File

@ -18,7 +18,7 @@ import (
// iterator creates snapshot, which takes lots of resources
// when doing huge scan, it is better to close it and reopen from time to time to free the resources
const disconnectBlocksRefreshIterator = uint64(1000000)
const refreshIterator = 5000000
const packedHeightBytes = 4
// RepairRocksDB calls RocksDb db repair function
@ -686,7 +686,7 @@ func (d *RocksDB) allAddressesScan(lower uint32, higher uint32) ([][]byte, [][]b
it.Seek(seekKey)
it.Next()
}
for count = 0; it.Valid() && count < disconnectBlocksRefreshIterator; it.Next() {
for count = 0; it.Valid() && count < refreshIterator; it.Next() {
totalOutputs++
count++
key = it.Key().Data()
@ -932,6 +932,50 @@ func (d *RocksDB) StoreInternalState(is *common.InternalState) error {
return d.db.PutCF(d.wo, d.cfh[cfDefault], []byte(internalStateKey), buf)
}
func (d *RocksDB) computeColumnSize(col int) (int64, int64, int64, error) {
var rows, keysSum, valuesSum int64
var seekKey []byte
for {
var key []byte
it := d.db.NewIteratorCF(d.ro, d.cfh[col])
if rows == 0 {
it.SeekToFirst()
} else {
glog.Info("Column ", cfNames[col], ": rows ", rows, ", key bytes ", keysSum, ", value bytes ", valuesSum, ", in progress...")
it.Seek(seekKey)
it.Next()
}
for count := 0; it.Valid() && count < refreshIterator; it.Next() {
key = it.Key().Data()
count++
rows++
keysSum += int64(len(key))
valuesSum += int64(len(it.Value().Data()))
}
seekKey = append([]byte{}, key...)
valid := it.Valid()
it.Close()
if !valid {
break
}
}
return rows, keysSum, valuesSum, nil
}
// ComputeInternalStateColumnStats computes stats of all db columns and sets them to internal state
// can be very slow operation
func (d *RocksDB) ComputeInternalStateColumnStats() error {
for c := 0; c < len(cfNames); c++ {
rows, keysSum, valuesSum, err := d.computeColumnSize(c)
if err != nil {
return err
}
d.is.SetDBColumnStats(c, rows, keysSum, valuesSum)
glog.Info("Column ", cfNames[c], ": rows ", rows, ", key bytes ", keysSum, ", value bytes ", valuesSum)
}
return nil
}
// Helpers
func packAddressKey(addrID []byte, height uint32) []byte {