Introduce TDigest.Count()
Expose the count of samples publicly so that users can more easily deicde what to do when the digest has too many samples.
- Id
- cb481bebb91ca56ac26073038bb47c09a0812a01
- Author
- Caio
- Commit time
- 2017-10-27T10:20:05+02:00
Modified serialization_test.go
t2, _ := FromBytes(bytes.NewReader(serialized))
- if t1.count != t2.count || t1.summary.Len() != t2.summary.Len() || t1.compression != t2.compression {
+ if t1.Count() != t2.Count() || t1.summary.Len() != t2.summary.Len() || t1.compression != t2.compression {
t.Errorf("Deserialized to something different. t1=%v t2=%v serialized=%v", t1, t2, serialized)
}
}
t.Fatalf(err.Error())
}
- if tdigest.count != 100000 {
- t.Fatalf("Expected deserialized t-digest to have a count of 100_000. Got %d", tdigest.count)
+ if tdigest.Count() != 100000 {
+ t.Fatalf("Expected deserialized t-digest to have a count of 100_000. Got %d", tdigest.Count())
}
assertDifferenceSmallerThan(tdigest, 0.5, 0.02, t)
Modified tdigest.go
return err
}
+// Count returns the total number of samples this digest represents
+// (i.e.: how many times Add() was called on it plus all the counts of
+// other digests the current has merged with).
+//
+// This is useful mainly for two scenarios:
+//
+// 1. Knowing if there is enough data so you can trust the quantiles
+// 2. Knowing if you've registered too many samples already and
+// deciding what to do about it.
+//
+// For the second case one approach would be to create a side empty
+// digest and start registering samples on it as well as on the old
+// (big) one and then discard the bigger one after a certain criterion
+// is reached (say, minimum number of samples or a small relative
+// error between new and old digests).
+func (t TDigest) Count() uint64 {
+ return t.count
+}
+
// Add(x) is an alias for AddWeighted(x,1)
// Read the documentation for AddWeighted for more details.
func (t *TDigest) Add(value float64) error {
Modified tdigest_test.go
dist2.Merge(subs[i])
}
- if dist.count != dist2.count {
- t.Errorf("Expected the number of centroids to be the same. %d != %d", dist.count, dist2.count)
+ if dist.Count() != dist2.Count() {
+ t.Errorf("Expected the number of centroids to be the same. %d != %d", dist.Count(), dist2.Count())
}
- if dist2.count != numItems {
- t.Errorf("Items shouldn't have disappeared. %d != %d", dist2.count, numItems)
+ if dist2.Count() != numItems {
+ t.Errorf("Items shouldn't have disappeared. %d != %d", dist2.Count(), numItems)
}
sort.Float64s(data)
_ = tdigest.Add(rand.Float64())
}
- initialCount := tdigest.count
+ initialCount := tdigest.Count()
err := tdigest.Compress()
if err != nil {
t.Errorf("Compress() triggered an unexpected error: %s", err)
}
- if tdigest.count != initialCount {
- t.Errorf("Compress() should not change count. Wanted %d, got %d", initialCount, tdigest.count)
+ if tdigest.Count() != initialCount {
+ t.Errorf("Compress() should not change count. Wanted %d, got %d", initialCount, tdigest.Count())
}
}