prober: only record latency for successful probes
This will make it easier to track probe latency on a dashboard. Updates https://github.com/tailscale/corp/issues/9916 Signed-off-by: Anton Tolchanov <anton@tailscale.com>pull/7654/head
parent
d92047cc30
commit
7083246409
|
@ -161,11 +161,12 @@ type Probe struct {
|
||||||
tick ticker
|
tick ticker
|
||||||
labels map[string]string
|
labels map[string]string
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
start time.Time // last time doProbe started
|
start time.Time // last time doProbe started
|
||||||
end time.Time // last time doProbe returned
|
end time.Time // last time doProbe returned
|
||||||
result bool // whether the last doProbe call succeeded
|
latency time.Duration // last successful probe latency
|
||||||
lastErr error
|
succeeded bool // whether the last doProbe call succeeded
|
||||||
|
lastErr error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close shuts down the Probe and unregisters it from its Prober.
|
// Close shuts down the Probe and unregisters it from its Prober.
|
||||||
|
@ -254,8 +255,13 @@ func (p *Probe) recordEnd(start time.Time, err error) {
|
||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
defer p.mu.Unlock()
|
defer p.mu.Unlock()
|
||||||
p.end = end
|
p.end = end
|
||||||
p.result = err == nil
|
p.succeeded = err == nil
|
||||||
p.lastErr = err
|
p.lastErr = err
|
||||||
|
if p.succeeded {
|
||||||
|
p.latency = end.Sub(p.start)
|
||||||
|
} else {
|
||||||
|
p.latency = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type varExporter struct {
|
type varExporter struct {
|
||||||
|
@ -289,13 +295,13 @@ func (v varExporter) probeInfo() map[string]ProbeInfo {
|
||||||
Labels: probe.labels,
|
Labels: probe.labels,
|
||||||
Start: probe.start,
|
Start: probe.start,
|
||||||
End: probe.end,
|
End: probe.end,
|
||||||
Result: probe.result,
|
Result: probe.succeeded,
|
||||||
}
|
}
|
||||||
if probe.lastErr != nil {
|
if probe.lastErr != nil {
|
||||||
inf.Error = probe.lastErr.Error()
|
inf.Error = probe.lastErr.Error()
|
||||||
}
|
}
|
||||||
if probe.end.After(probe.start) {
|
if probe.latency > 0 {
|
||||||
inf.Latency = probe.end.Sub(probe.start).String()
|
inf.Latency = probe.latency.String()
|
||||||
}
|
}
|
||||||
out[probe.name] = inf
|
out[probe.name] = inf
|
||||||
probe.mu.Unlock()
|
probe.mu.Unlock()
|
||||||
|
@ -358,9 +364,10 @@ func (v varExporter) WritePrometheus(w io.Writer, prefix string) {
|
||||||
}
|
}
|
||||||
if !probe.end.IsZero() {
|
if !probe.end.IsZero() {
|
||||||
fmt.Fprintf(w, "%s_end_secs{%s} %d\n", prefix, labels, probe.end.Unix())
|
fmt.Fprintf(w, "%s_end_secs{%s} %d\n", prefix, labels, probe.end.Unix())
|
||||||
// Start is always present if end is.
|
if probe.latency > 0 {
|
||||||
fmt.Fprintf(w, "%s_latency_millis{%s} %d\n", prefix, labels, probe.end.Sub(probe.start).Milliseconds())
|
fmt.Fprintf(w, "%s_latency_millis{%s} %d\n", prefix, labels, probe.latency.Milliseconds())
|
||||||
if probe.result {
|
}
|
||||||
|
if probe.succeeded {
|
||||||
fmt.Fprintf(w, "%s_result{%s} 1\n", prefix, labels)
|
fmt.Fprintf(w, "%s_result{%s} 1\n", prefix, labels)
|
||||||
} else {
|
} else {
|
||||||
fmt.Fprintf(w, "%s_result{%s} 0\n", prefix, labels)
|
fmt.Fprintf(w, "%s_result{%s} 0\n", prefix, labels)
|
||||||
|
|
|
@ -237,12 +237,11 @@ func TestExpvar(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
check("probe", ProbeInfo{
|
check("probe", ProbeInfo{
|
||||||
Labels: map[string]string{"label": "value"},
|
Labels: map[string]string{"label": "value"},
|
||||||
Start: epoch,
|
Start: epoch,
|
||||||
End: epoch.Add(aFewMillis),
|
End: epoch.Add(aFewMillis),
|
||||||
Latency: aFewMillis.String(),
|
Result: false,
|
||||||
Result: false,
|
Error: "failing, as instructed by test",
|
||||||
Error: "failing, as instructed by test",
|
|
||||||
})
|
})
|
||||||
|
|
||||||
succeed.Store(true)
|
succeed.Store(true)
|
||||||
|
@ -280,9 +279,8 @@ func TestPrometheus(t *testing.T) {
|
||||||
probe_interval_secs{name="testprobe",label="value"} %f
|
probe_interval_secs{name="testprobe",label="value"} %f
|
||||||
probe_start_secs{name="testprobe",label="value"} %d
|
probe_start_secs{name="testprobe",label="value"} %d
|
||||||
probe_end_secs{name="testprobe",label="value"} %d
|
probe_end_secs{name="testprobe",label="value"} %d
|
||||||
probe_latency_millis{name="testprobe",label="value"} %d
|
|
||||||
probe_result{name="testprobe",label="value"} 0
|
probe_result{name="testprobe",label="value"} 0
|
||||||
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix(), aFewMillis.Milliseconds()))
|
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()))
|
||||||
if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" {
|
if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" {
|
||||||
return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
|
return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue