1
0
Fork 0

Add passive health checks

This commit is contained in:
Nelson Isioma 2025-08-21 10:40:06 +01:00 committed by GitHub
parent c20802b07e
commit fc0fac8543
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 696 additions and 6 deletions

View file

@ -1,19 +1,24 @@
package healthcheck
import (
"bufio"
"context"
"errors"
"fmt"
"net"
"net/http"
"net/http/httptrace"
"net/url"
"strconv"
"sync"
"time"
gokitmetrics "github.com/go-kit/kit/metrics"
"github.com/rs/zerolog/log"
ptypes "github.com/traefik/paerser/types"
"github.com/traefik/traefik/v3/pkg/config/dynamic"
"github.com/traefik/traefik/v3/pkg/config/runtime"
"golang.org/x/sync/singleflight"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
@ -322,3 +327,160 @@ func (shc *ServiceHealthChecker) checkHealthGRPC(ctx context.Context, serverURL
return nil
}
type PassiveServiceHealthChecker struct {
serviceName string
balancer StatusSetter
metrics metricsHealthCheck
maxFailedAttempts int
failureWindow ptypes.Duration
hasActiveHealthCheck bool
failuresMu sync.RWMutex
failures map[string][]time.Time
timersGroup singleflight.Group
timers sync.Map
}
func NewPassiveHealthChecker(serviceName string, balancer StatusSetter, maxFailedAttempts int, failureWindow ptypes.Duration, hasActiveHealthCheck bool, metrics metricsHealthCheck) *PassiveServiceHealthChecker {
return &PassiveServiceHealthChecker{
serviceName: serviceName,
balancer: balancer,
failures: make(map[string][]time.Time),
maxFailedAttempts: maxFailedAttempts,
failureWindow: failureWindow,
hasActiveHealthCheck: hasActiveHealthCheck,
metrics: metrics,
}
}
func (p *PassiveServiceHealthChecker) WrapHandler(ctx context.Context, next http.Handler, targetURL string) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
var backendCalled bool
trace := &httptrace.ClientTrace{
WroteHeaders: func() {
backendCalled = true
},
WroteRequest: func(httptrace.WroteRequestInfo) {
backendCalled = true
},
}
clientTraceCtx := httptrace.WithClientTrace(req.Context(), trace)
codeCatcher := &codeCatcher{
ResponseWriter: rw,
}
next.ServeHTTP(codeCatcher, req.WithContext(clientTraceCtx))
if backendCalled && codeCatcher.statusCode < http.StatusInternalServerError {
p.failuresMu.Lock()
p.failures[targetURL] = nil
p.failuresMu.Unlock()
return
}
p.failuresMu.Lock()
p.failures[targetURL] = append(p.failures[targetURL], time.Now())
p.failuresMu.Unlock()
if p.healthy(targetURL) {
return
}
// We need to guarantee that only one goroutine (request) will update the status and create a timer for the target.
_, _, _ = p.timersGroup.Do(targetURL, func() (interface{}, error) {
// A timer is already running for this target;
// it means that the target is already considered unhealthy.
if _, ok := p.timers.Load(targetURL); ok {
return nil, nil
}
p.balancer.SetStatus(ctx, targetURL, false)
p.metrics.ServiceServerUpGauge().With("service", p.serviceName, "url", targetURL).Set(0)
// If the service has an active health check, the passive health checker should not reset the status.
// The active health check will handle the status updates.
if p.hasActiveHealthCheck {
return nil, nil
}
go func() {
timer := time.NewTimer(time.Duration(p.failureWindow))
defer timer.Stop()
p.timers.Store(targetURL, timer)
select {
case <-ctx.Done():
case <-timer.C:
p.timers.Delete(targetURL)
p.balancer.SetStatus(ctx, targetURL, true)
p.metrics.ServiceServerUpGauge().With("service", p.serviceName, "url", targetURL).Set(1)
}
}()
return nil, nil
})
})
}
func (p *PassiveServiceHealthChecker) healthy(targetURL string) bool {
windowStart := time.Now().Add(-time.Duration(p.failureWindow))
p.failuresMu.Lock()
defer p.failuresMu.Unlock()
// Filter failures within the sliding window.
failures := p.failures[targetURL]
for i, t := range failures {
if t.After(windowStart) {
p.failures[targetURL] = failures[i:]
break
}
}
// Check if failures exceed maxFailedAttempts.
return len(p.failures[targetURL]) < p.maxFailedAttempts
}
type codeCatcher struct {
http.ResponseWriter
statusCode int
}
func (c *codeCatcher) WriteHeader(statusCode int) {
// Here we allow the overriding of the status code,
// for the health check we care about the last status code written.
c.statusCode = statusCode
c.ResponseWriter.WriteHeader(statusCode)
}
func (c *codeCatcher) Write(bytes []byte) (int, error) {
// At the time of writing, if the status code is not set,
// or set to an informational status code (1xx),
// we set it to http.StatusOK (200).
if c.statusCode < http.StatusOK {
c.statusCode = http.StatusOK
}
return c.ResponseWriter.Write(bytes)
}
func (c *codeCatcher) Flush() {
if flusher, ok := c.ResponseWriter.(http.Flusher); ok {
flusher.Flush()
}
}
func (c *codeCatcher) Hijack() (net.Conn, *bufio.ReadWriter, error) {
if h, ok := c.ResponseWriter.(http.Hijacker); ok {
return h.Hijack()
}
return nil, nil, fmt.Errorf("not a hijacker: %T", c.ResponseWriter)
}