caio.co/de/go-tdigest

Introduce TDigest.Count()

Expose the count of samples publicly so that users can more
easily deicde what to do when the digest has too many samples.
Id
cb481bebb91ca56ac26073038bb47c09a0812a01
Author
Caio
Commit time
2017-10-27T10:20:05+02:00

Modified serialization_test.go

@@ -35,7 +35,7

t2, _ := FromBytes(bytes.NewReader(serialized))

- if t1.count != t2.count || t1.summary.Len() != t2.summary.Len() || t1.compression != t2.compression {
+ if t1.Count() != t2.Count() || t1.summary.Len() != t2.summary.Len() || t1.compression != t2.compression {
t.Errorf("Deserialized to something different. t1=%v t2=%v serialized=%v", t1, t2, serialized)
}
}
@@ -71,8 +71,8
t.Fatalf(err.Error())
}

- if tdigest.count != 100000 {
- t.Fatalf("Expected deserialized t-digest to have a count of 100_000. Got %d", tdigest.count)
+ if tdigest.Count() != 100000 {
+ t.Fatalf("Expected deserialized t-digest to have a count of 100_000. Got %d", tdigest.Count())
}

assertDifferenceSmallerThan(tdigest, 0.5, 0.02, t)

Modified tdigest.go

@@ -184,6 +184,25
return err
}

+// Count returns the total number of samples this digest represents
+// (i.e.: how many times Add() was called on it plus all the counts of
+// other digests the current has merged with).
+//
+// This is useful mainly for two scenarios:
+//
+// 1. Knowing if there is enough data so you can trust the quantiles
+// 2. Knowing if you've registered too many samples already and
+// deciding what to do about it.
+//
+// For the second case one approach would be to create a side empty
+// digest and start registering samples on it as well as on the old
+// (big) one and then discard the bigger one after a certain criterion
+// is reached (say, minimum number of samples or a small relative
+// error between new and old digests).
+func (t TDigest) Count() uint64 {
+ return t.count
+}
+
// Add(x) is an alias for AddWeighted(x,1)
// Read the documentation for AddWeighted for more details.
func (t *TDigest) Add(value float64) error {

Modified tdigest_test.go

@@ -300,12 +300,12
dist2.Merge(subs[i])
}

- if dist.count != dist2.count {
- t.Errorf("Expected the number of centroids to be the same. %d != %d", dist.count, dist2.count)
+ if dist.Count() != dist2.Count() {
+ t.Errorf("Expected the number of centroids to be the same. %d != %d", dist.Count(), dist2.Count())
}

- if dist2.count != numItems {
- t.Errorf("Items shouldn't have disappeared. %d != %d", dist2.count, numItems)
+ if dist2.Count() != numItems {
+ t.Errorf("Items shouldn't have disappeared. %d != %d", dist2.Count(), numItems)
}

sort.Float64s(data)
@@ -337,15 +337,15
_ = tdigest.Add(rand.Float64())
}

- initialCount := tdigest.count
+ initialCount := tdigest.Count()

err := tdigest.Compress()
if err != nil {
t.Errorf("Compress() triggered an unexpected error: %s", err)
}

- if tdigest.count != initialCount {
- t.Errorf("Compress() should not change count. Wanted %d, got %d", initialCount, tdigest.count)
+ if tdigest.Count() != initialCount {
+ t.Errorf("Compress() should not change count. Wanted %d, got %d", initialCount, tdigest.Count())
}
}