ipn/ipnlocal: rate-limit diagnose logs in bugreport
We can log too quickly for logtail to catch up, even when we opt out of log rate-limiting. When the user passes the --diagnose flag to bugreport, we use a token bucket to control how many logs per second are printed and sleep until we're able to write more. Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: If27672d66b621b589280bd0fe228de367ffcbd8fpull/7133/head
parent
3eae75b1b8
commit
d1146dc701
|
@ -4617,8 +4617,13 @@ func (b *LocalBackend) handleQuad100Port80Conn(w http.ResponseWriter, r *http.Re
|
|||
}
|
||||
|
||||
func (b *LocalBackend) Doctor(ctx context.Context, logf logger.Logf) {
|
||||
var checks []doctor.Check
|
||||
// We can write logs too fast for logtail to handle, even when
|
||||
// opting-out of rate limits. Limit ourselves to at most one message
|
||||
// per 20ms and a burst of 60 log lines, which should be fast enough to
|
||||
// not block for too long but slow enough that we can upload all lines.
|
||||
logf = logger.SlowLoggerWithClock(ctx, logf, 20*time.Millisecond, 60, time.Now)
|
||||
|
||||
var checks []doctor.Check
|
||||
checks = append(checks, routetable.Check{})
|
||||
|
||||
// TODO(andrew): more
|
||||
|
|
|
@ -228,6 +228,53 @@ func RateLimitedFnWithClock(logf Logf, f time.Duration, burst int, maxCache int,
|
|||
}
|
||||
}
|
||||
|
||||
// SlowLoggerWithClock is a logger that applies rate limits similar to
|
||||
// RateLimitedFnWithClock, but instead of dropping logs will sleep until they
|
||||
// can be written. This should only be used for debug logs, and not in a hot path.
|
||||
//
|
||||
// The provided context, if canceled, will cause all logs to be dropped and
|
||||
// prevent any sleeps.
|
||||
func SlowLoggerWithClock(ctx context.Context, logf Logf, f time.Duration, burst int, timeNow func() time.Time) Logf {
|
||||
var (
|
||||
mu sync.Mutex
|
||||
tb = newTokenBucket(f, burst, timeNow())
|
||||
)
|
||||
return func(format string, args ...any) {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Hold the mutex for the entire length of the check + log
|
||||
// since our token bucket isn't concurrency-safe.
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
tb.AdvanceTo(timeNow())
|
||||
|
||||
// If we can get a token, then do that and return.
|
||||
if tb.Get() {
|
||||
logf(format, args...)
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise, sleep for 2x the duration so that we don't
|
||||
// immediately sleep again on the next call.
|
||||
tmr := time.NewTimer(2 * f)
|
||||
defer tmr.Stop()
|
||||
select {
|
||||
case curr := <-tmr.C:
|
||||
tb.AdvanceTo(curr)
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
if !tb.Get() {
|
||||
log.Printf("[unexpected] error rate-limiting in SlowLoggerWithClock")
|
||||
return
|
||||
}
|
||||
logf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
// LogOnChange logs a given line only if line != lastLine, or if maxInterval has passed
|
||||
// since the last time this identical line was logged.
|
||||
func LogOnChange(logf Logf, maxInterval time.Duration, timeNow func() time.Time) Logf {
|
||||
|
|
Loading…
Reference in New Issue