fix cdf for values near the last centroid
- Id
- 3a757a11b66ec1cbba69b9c3fc37119ff2718550
- Author
- Jeff Wendling
- Commit time
- 2018-01-18T13:53:17-07:00
Modified tdigest.go
aMean := t.summary.Mean(aIdx)
if value < aMean+right {
aCount := float64(t.summary.Count(aIdx))
- return (tot + aCount*interpolate(value, aMean-left, aMean+right)) / 2
+ return (tot + aCount*interpolate(value, aMean-left, aMean+right)) / float64(t.Count())
}
return 1
}
Modified tdigest_test.go
}
}
+func TestCDFInsideLastCentroid(t *testing.T) {
+ // values pulled from a live digest. sorry it's a lot!
+ td := &TDigest{
+ summary: &summary{
+ means: []float64{2120.75048828125, 2260.3844299316406, 3900.490264892578, 3937.495807647705, 5390.479816436768, 10450.335285186768, 14152.897296905518, 16442.676349639893, 24303.143146514893, 56961.87361526489, 63891.24959182739, 73982.55232620239, 86477.50447463989, 110746.62556838989, 175479.7388496399, 300492.3404121399, 440452.5279121399, 515611.7700996399, 535827.0025215149, 546241.6822090149, 556965.3648262024, 569791.2124824524, 587320.6870918274, 603969.4175605774, 613751.6177558899, 624708.7593574524, 635060.0718574524, 641924.2007637024, 650656.4302558899, 660653.1714668274, 671380.9009590149, 687094.3667793274, 716595.8824043274, 740870.9800605774, 760276.2437324524, 768857.5786933899, 775021.0025215149, 787686.0337715149, 801473.4624824524, 815225.1255683899, 832358.6997871399, 852438.4751777649, 866134.2935371399, 1.10661549666214e+06, 1.1212118980293274e+06, 1.2230108433418274e+06, 1.5446490620918274e+06, 4.306712312091827e+06, 5.487582562091827e+06, 6.306383562091827e+06, 7.089308312091827e+06, 7.520797593341827e+06},
+ counts: []uint32{0x1, 0x1, 0x1, 0x1, 0x1, 0x2, 0x1, 0x4, 0x5, 0x6, 0x3, 0x3, 0x4, 0x11, 0x23, 0x2f, 0x1e, 0x1b, 0x36, 0x31, 0x33, 0x4e, 0x5f, 0x61, 0x48, 0x2e, 0x26, 0x28, 0x2a, 0x31, 0x39, 0x51, 0x32, 0x2b, 0x12, 0x8, 0xb, 0xa, 0x11, 0xa, 0x11, 0x9, 0x7, 0x1, 0x1, 0x1, 0x3, 0x2, 0x1, 0x1, 0x1, 0x1},
+ },
+ compression: 5,
+ count: 1250,
+ rng: &globalRNG{},
+ }
+ td.summary.rebuildFenwickTree()
+
+ if cdf := td.CDF(7.144560976650238e+06); cdf > 1 {
+ t.Fatalf("invalid: %v", cdf)
+ }
+}
+
func benchmarkAdd(compression uint32, b *testing.B) {
t := uncheckedNew(Compression(compression))