Introduce a parameter-less New()
Now `tdigest.New()` gives a sane ready-to-use-in-most-cases digest. Configuration should be done via self referential functions. Ex: // create a digest with compression of 200 tdigest.New(tdigest.Compression(200)) Notice that New() can still panic, which means that deserialization if still more dangerous than it should.
- Id
- e3cc95f78bce83e7f57ca929f9fede9aa63f1a21
- Author
- Caio
- Commit time
- 2017-10-25T14:19:04+02:00
Modified README.md
)
func main() {
- var t = tdigest.New(100)
+ var t = tdigest.New()
for i := 0; i < 10000; i++ {
t.Add(rand.Float64(), 1)
Modified serialization.go
return nil, err
}
- t := New(compression)
+ t := New(Compression(uint32(compression)))
var numCentroids int32
err = binary.Read(buf, endianess, &numCentroids)
Modified serialization_test.go
}
func TestSerialization(t *testing.T) {
- // NOTE Using a high compression value and adding few items
- // so we don't end up compressing automatically
- t1 := New(100)
+ t1 := New()
for i := 0; i < 100; i++ {
_ = t1.Add(rand.Float64(), 1)
}
Modified tdigest.go
}
// New creates a new digest.
-// The compression parameter rules the threshold in which samples are
-// merged together - the more often distinct samples are merged the more
-// precision is lost. Compression should be tuned according to your data
-// distribution, but a value of 100 is often good enough. A higher
-// compression value means holding more centroids in memory (thus: better
-// precision), which means a bigger serialization payload and higher
-// memory footprint.
-// Compression must be a value greater of equal to 1, will panic
-// otherwise.
-func New(compression float64) *TDigest {
- if compression < 1 {
- panic("Compression must be >= 1.0")
- }
- return &TDigest{
- compression: compression,
- summary: newSummary(estimateCapacity(compression)),
+//
+// By default the digest is constructed with a configuration that
+// should be useful for most use-cases.
+func New(options ...tdigestOption) *TDigest {
+ tdigest := &TDigest{
+ compression: 100,
count: 0,
}
+
+ for _, option := range options {
+ option(tdigest)
+ }
+
+ tdigest.summary = newSummary(estimateCapacity(tdigest.compression))
+ return tdigest
}
func _quantile(index float64, previousIndex float64, nextIndex float64, previousMean float64, nextMean float64) float64 {
Modified tdigest_test.go
// for all tests. So, no test concurrency here.
func TestTInternals(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
if !math.IsNaN(tdigest.Quantile(0.1)) {
t.Errorf("Quantile() on an empty digest should return NaN. Got: %.4f", tdigest.Quantile(0.1))
}
func TestUniformDistribution(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
for i := 0; i < 100000; i++ {
_ = tdigest.Add(rand.Float64(), 1)
}
func TestSequentialInsertion(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
data := make([]float64, 10000)
for i := 0; i < len(data); i++ {
}
func TestNonSequentialInsertion(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
// Not quite a uniform distribution, but close.
data := make([]float64, 1000)
}
func TestSingletonInACrowd(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
for i := 0; i < 10000; i++ {
tdigest.Add(10, 1)
}
}
func TestRespectBounds(t *testing.T) {
- tdigest := New(10)
+ tdigest := New(Compression(10))
data := []float64{0, 279, 2, 281}
for _, f := range data {
}
func TestWeights(t *testing.T) {
- tdigest := New(10)
+ tdigest := New(Compression(10))
// Create data slice with repeats matching weights we gave to tdigest
data := []float64{}
}
func TestIntegers(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
_ = tdigest.Add(1, 1)
_ = tdigest.Add(2, 1)
t.Errorf("Expected p(0.5) = 2, Got %.2f instead", tdigest.Quantile(0.5))
}
- tdigest = New(100)
+ tdigest = New()
for _, i := range []float64{1, 2, 2, 2, 2, 2, 2, 2, 3} {
_ = tdigest.Add(i, 1)
subs := make([]*TDigest, numSubs)
for i := 0; i < numSubs; i++ {
- subs[i] = New(100)
+ subs[i] = New()
}
- dist := New(100)
+ dist := New()
for i := 0; i < numItems; i++ {
num := rand.Float64()
dist.Compress()
- dist2 := New(100)
+ dist2 := New()
for i := 0; i < numSubs; i++ {
dist2.Merge(subs[i])
}
}
func TestCompressDoesntChangeCount(t *testing.T) {
- tdigest := New(100)
+ tdigest := New()
for i := 0; i < 1000; i++ {
_ = tdigest.Add(rand.Float64(), 1)
}
func TestPanic(t *testing.T) {
- shouldPanic(func() {
- New(0.5)
- }, t, "Compression < 1 should panic!")
-
- tdigest := New(100)
+ tdigest := New()
shouldPanic(func() {
tdigest.Quantile(-42)
}
func TestForEachCentroid(t *testing.T) {
- tdigest := New(10)
+ tdigest := New(Compression(10))
for i := 0; i < 100; i++ {
_ = tdigest.Add(float64(i), 1)
}
}
-func benchmarkAdd(compression float64, b *testing.B) {
- t := New(compression)
+func benchmarkAdd(compression uint32, b *testing.B) {
+ t := New(Compression(compression))
data := make([]float64, b.N)
for n := 0; n < b.N; n++ {
Created options.go
+package tdigest
+
+type tdigestOption func(*TDigest)
+
+// Compression sets the digest compression
+//
+// The compression parameter rules the threshold in which samples are
+// merged together - the more often distinct samples are merged the more
+// precision is lost. Compression should be tuned according to your data
+// distribution, but a value of 100 (the default) is often good enough.
+//
+// A higher compression value means holding more centroids in memory
+// (thus: better precision), which means a bigger serialization payload,
+// higher memory footprint and slower addition of new samples.
+//
+// Compression must be a value greater of equal to 1, will panic
+// otherwise.
+func Compression(compression uint32) tdigestOption {
+ if compression < 1 {
+ panic("Compression should be >= 1")
+ }
+ return func(t *TDigest) {
+ t.compression = float64(compression)
+ }
+}
Created options_test.go
+package tdigest
+
+import "testing"
+
+func TestDefaults(t *testing.T) {
+ digest := New()
+
+ if digest.compression != 100 {
+ t.Errorf("The default compression should be 100")
+ }
+}
+
+func TestCompression(t *testing.T) {
+ if New(Compression(40)).compression != 40 {
+ t.Errorf("The compression option should change the new digest compression")
+ }
+
+ shouldPanic(func() { Compression(0) }, t, "Compression < 1 should panic")
+}