1
0
Fork 0

Add unhealthy Interval to the health check configuration

This commit is contained in:
Swastik Sarkar 2025-04-09 13:40:05 +05:30 committed by GitHub
parent 6c3b099c25
commit d7d0017545
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 701 additions and 295 deletions

View file

@ -299,17 +299,18 @@ type Server struct {
// ServerHealthCheck holds the HealthCheck configuration.
type ServerHealthCheck struct {
Scheme string `json:"scheme,omitempty" toml:"scheme,omitempty" yaml:"scheme,omitempty" export:"true"`
Mode string `json:"mode,omitempty" toml:"mode,omitempty" yaml:"mode,omitempty" export:"true"`
Path string `json:"path,omitempty" toml:"path,omitempty" yaml:"path,omitempty" export:"true"`
Method string `json:"method,omitempty" toml:"method,omitempty" yaml:"method,omitempty" export:"true"`
Status int `json:"status,omitempty" toml:"status,omitempty" yaml:"status,omitempty" export:"true"`
Port int `json:"port,omitempty" toml:"port,omitempty,omitzero" yaml:"port,omitempty" export:"true"`
Interval ptypes.Duration `json:"interval,omitempty" toml:"interval,omitempty" yaml:"interval,omitempty" export:"true"`
Timeout ptypes.Duration `json:"timeout,omitempty" toml:"timeout,omitempty" yaml:"timeout,omitempty" export:"true"`
Hostname string `json:"hostname,omitempty" toml:"hostname,omitempty" yaml:"hostname,omitempty"`
FollowRedirects *bool `json:"followRedirects,omitempty" toml:"followRedirects,omitempty" yaml:"followRedirects,omitempty" export:"true"`
Headers map[string]string `json:"headers,omitempty" toml:"headers,omitempty" yaml:"headers,omitempty" export:"true"`
Scheme string `json:"scheme,omitempty" toml:"scheme,omitempty" yaml:"scheme,omitempty" export:"true"`
Mode string `json:"mode,omitempty" toml:"mode,omitempty" yaml:"mode,omitempty" export:"true"`
Path string `json:"path,omitempty" toml:"path,omitempty" yaml:"path,omitempty" export:"true"`
Method string `json:"method,omitempty" toml:"method,omitempty" yaml:"method,omitempty" export:"true"`
Status int `json:"status,omitempty" toml:"status,omitempty" yaml:"status,omitempty" export:"true"`
Port int `json:"port,omitempty" toml:"port,omitempty,omitzero" yaml:"port,omitempty" export:"true"`
Interval ptypes.Duration `json:"interval,omitempty" toml:"interval,omitempty" yaml:"interval,omitempty" export:"true"`
UnhealthyInterval *ptypes.Duration `json:"unhealthyInterval,omitempty" toml:"unhealthyInterval,omitempty" yaml:"unhealthyInterval,omitempty" export:"true"`
Timeout ptypes.Duration `json:"timeout,omitempty" toml:"timeout,omitempty" yaml:"timeout,omitempty" export:"true"`
Hostname string `json:"hostname,omitempty" toml:"hostname,omitempty" yaml:"hostname,omitempty"`
FollowRedirects *bool `json:"followRedirects,omitempty" toml:"followRedirects,omitempty" yaml:"followRedirects,omitempty" export:"true"`
Headers map[string]string `json:"headers,omitempty" toml:"headers,omitempty" yaml:"headers,omitempty" export:"true"`
}
// SetDefaults Default values for a HealthCheck.

View file

@ -1428,6 +1428,11 @@ func (in *Server) DeepCopy() *Server {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServerHealthCheck) DeepCopyInto(out *ServerHealthCheck) {
*out = *in
if in.UnhealthyInterval != nil {
in, out := &in.UnhealthyInterval, &out.UnhealthyInterval
*out = new(paersertypes.Duration)
**out = **in
}
if in.FollowRedirects != nil {
in, out := &in.FollowRedirects, &out.FollowRedirects
*out = new(bool)

View file

@ -162,6 +162,7 @@ func TestDecodeConfiguration(t *testing.T) {
"traefik.http.services.Service0.loadbalancer.healthcheck.headers.name1": "foobar",
"traefik.http.services.Service0.loadbalancer.healthcheck.hostname": "foobar",
"traefik.http.services.Service0.loadbalancer.healthcheck.interval": "1s",
"traefik.http.services.Service0.loadbalancer.healthcheck.unhealthyinterval": "1s",
"traefik.http.services.Service0.loadbalancer.healthcheck.path": "foobar",
"traefik.http.services.Service0.loadbalancer.healthcheck.method": "foobar",
"traefik.http.services.Service0.loadbalancer.healthcheck.status": "401",
@ -186,6 +187,7 @@ func TestDecodeConfiguration(t *testing.T) {
"traefik.http.services.Service1.loadbalancer.healthcheck.headers.name1": "foobar",
"traefik.http.services.Service1.loadbalancer.healthcheck.hostname": "foobar",
"traefik.http.services.Service1.loadbalancer.healthcheck.interval": "1s",
"traefik.http.services.Service1.loadbalancer.healthcheck.unhealthyinterval": "1s",
"traefik.http.services.Service1.loadbalancer.healthcheck.path": "foobar",
"traefik.http.services.Service1.loadbalancer.healthcheck.method": "foobar",
"traefik.http.services.Service1.loadbalancer.healthcheck.status": "401",
@ -701,15 +703,16 @@ func TestDecodeConfiguration(t *testing.T) {
},
},
HealthCheck: &dynamic.ServerHealthCheck{
Scheme: "foobar",
Mode: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Scheme: "foobar",
Mode: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
UnhealthyInterval: pointer(ptypes.Duration(time.Second)),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Headers: map[string]string{
"name0": "foobar",
"name1": "foobar",
@ -735,15 +738,16 @@ func TestDecodeConfiguration(t *testing.T) {
},
},
HealthCheck: &dynamic.ServerHealthCheck{
Scheme: "foobar",
Mode: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Scheme: "foobar",
Mode: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
UnhealthyInterval: pointer(ptypes.Duration(time.Second)),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Headers: map[string]string{
"name0": "foobar",
"name1": "foobar",
@ -1244,14 +1248,15 @@ func TestEncodeConfiguration(t *testing.T) {
},
},
HealthCheck: &dynamic.ServerHealthCheck{
Scheme: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Scheme: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
UnhealthyInterval: pointer(ptypes.Duration(time.Second)),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Headers: map[string]string{
"name0": "foobar",
"name1": "foobar",
@ -1276,14 +1281,15 @@ func TestEncodeConfiguration(t *testing.T) {
},
},
HealthCheck: &dynamic.ServerHealthCheck{
Scheme: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Scheme: "foobar",
Path: "foobar",
Method: "foobar",
Status: 401,
Port: 42,
Interval: ptypes.Duration(time.Second),
UnhealthyInterval: pointer(ptypes.Duration(time.Second)),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
Headers: map[string]string{
"name0": "foobar",
"name1": "foobar",
@ -1471,6 +1477,7 @@ func TestEncodeConfiguration(t *testing.T) {
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.Headers.name1": "foobar",
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.Hostname": "foobar",
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.Interval": "1000000000",
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.UnhealthyInterval": "1000000000",
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.Path": "foobar",
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.Method": "foobar",
"traefik.HTTP.Services.Service0.LoadBalancer.HealthCheck.Status": "401",
@ -1495,6 +1502,7 @@ func TestEncodeConfiguration(t *testing.T) {
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.Headers.name1": "foobar",
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.Hostname": "foobar",
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.Interval": "1000000000",
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.UnhealthyInterval": "1000000000",
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.Path": "foobar",
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.Method": "foobar",
"traefik.HTTP.Services.Service1.LoadBalancer.HealthCheck.Status": "401",

View file

@ -40,18 +40,27 @@ type metricsHealthCheck interface {
ServiceServerUpGauge() gokitmetrics.Gauge
}
type target struct {
targetURL *url.URL
name string
}
type ServiceHealthChecker struct {
balancer StatusSetter
info *runtime.ServiceInfo
config *dynamic.ServerHealthCheck
interval time.Duration
timeout time.Duration
config *dynamic.ServerHealthCheck
interval time.Duration
unhealthyInterval time.Duration
timeout time.Duration
metrics metricsHealthCheck
client *http.Client
targets map[string]*url.URL
client *http.Client
healthyTargets chan target
unhealthyTargets chan target
serviceName string
}
@ -60,13 +69,26 @@ func NewServiceHealthChecker(ctx context.Context, metrics metricsHealthCheck, co
interval := time.Duration(config.Interval)
if interval <= 0 {
logger.Error().Msg("Health check interval smaller than zero")
logger.Error().Msg("Health check interval smaller than zero, default value will be used instead.")
interval = time.Duration(dynamic.DefaultHealthCheckInterval)
}
// If the unhealthyInterval option is not set, we use the interval option value,
// to check the unhealthy targets as often as the healthy ones.
var unhealthyInterval time.Duration
if config.UnhealthyInterval == nil {
unhealthyInterval = interval
} else {
unhealthyInterval = time.Duration(*config.UnhealthyInterval)
if unhealthyInterval <= 0 {
logger.Error().Msg("Health check unhealthy interval smaller than zero, default value will be used instead.")
unhealthyInterval = time.Duration(dynamic.DefaultHealthCheckInterval)
}
}
timeout := time.Duration(config.Timeout)
if timeout <= 0 {
logger.Error().Msg("Health check timeout smaller than zero")
logger.Error().Msg("Health check timeout smaller than zero, default value will be used instead.")
timeout = time.Duration(dynamic.DefaultHealthCheckTimeout)
}
@ -80,21 +102,38 @@ func NewServiceHealthChecker(ctx context.Context, metrics metricsHealthCheck, co
}
}
healthyTargets := make(chan target, len(targets))
for name, targetURL := range targets {
healthyTargets <- target{
targetURL: targetURL,
name: name,
}
}
unhealthyTargets := make(chan target, len(targets))
return &ServiceHealthChecker{
balancer: service,
info: info,
config: config,
interval: interval,
timeout: timeout,
targets: targets,
serviceName: serviceName,
client: client,
metrics: metrics,
balancer: service,
info: info,
config: config,
interval: interval,
unhealthyInterval: unhealthyInterval,
timeout: timeout,
healthyTargets: healthyTargets,
unhealthyTargets: unhealthyTargets,
serviceName: serviceName,
client: client,
metrics: metrics,
}
}
func (shc *ServiceHealthChecker) Launch(ctx context.Context) {
ticker := time.NewTicker(shc.interval)
go shc.healthcheck(ctx, shc.unhealthyTargets, shc.unhealthyInterval)
shc.healthcheck(ctx, shc.healthyTargets, shc.interval)
}
func (shc *ServiceHealthChecker) healthcheck(ctx context.Context, targets chan target, interval time.Duration) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
@ -103,7 +142,23 @@ func (shc *ServiceHealthChecker) Launch(ctx context.Context) {
return
case <-ticker.C:
for proxyName, target := range shc.targets {
// We collect the targets to check once for all,
// to avoid rechecking a target that has been moved during the health check.
var targetsToCheck []target
hasMoreTargets := true
for hasMoreTargets {
select {
case <-ctx.Done():
return
case target := <-targets:
targetsToCheck = append(targetsToCheck, target)
default:
hasMoreTargets = false
}
}
// Now we can check the targets.
for _, target := range targetsToCheck {
select {
case <-ctx.Done():
return
@ -113,14 +168,14 @@ func (shc *ServiceHealthChecker) Launch(ctx context.Context) {
up := true
serverUpMetricValue := float64(1)
if err := shc.executeHealthCheck(ctx, shc.config, target); err != nil {
if err := shc.executeHealthCheck(ctx, shc.config, target.targetURL); err != nil {
// The context is canceled when the dynamic configuration is refreshed.
if errors.Is(err, context.Canceled) {
return
}
log.Ctx(ctx).Warn().
Str("targetURL", target.String()).
Str("targetURL", target.targetURL.String()).
Err(err).
Msg("Health check failed.")
@ -128,17 +183,21 @@ func (shc *ServiceHealthChecker) Launch(ctx context.Context) {
serverUpMetricValue = float64(0)
}
shc.balancer.SetStatus(ctx, proxyName, up)
shc.balancer.SetStatus(ctx, target.name, up)
statusStr := runtime.StatusDown
var statusStr string
if up {
statusStr = runtime.StatusUp
shc.healthyTargets <- target
} else {
statusStr = runtime.StatusDown
shc.unhealthyTargets <- target
}
shc.info.UpdateServerStatus(target.String(), statusStr)
shc.info.UpdateServerStatus(target.targetURL.String(), statusStr)
shc.metrics.ServiceServerUpGauge().
With("service", shc.serviceName, "url", target.String()).
With("service", shc.serviceName, "url", target.targetURL.String()).
Set(serverUpMetricValue)
}
}

View file

@ -419,11 +419,12 @@ func TestServiceHealthChecker_Launch(t *testing.T) {
lb := &testLoadBalancer{RWMutex: &sync.RWMutex{}}
config := &dynamic.ServerHealthCheck{
Mode: test.mode,
Status: test.status,
Path: "/path",
Interval: ptypes.Duration(500 * time.Millisecond),
Timeout: ptypes.Duration(499 * time.Millisecond),
Mode: test.mode,
Status: test.status,
Path: "/path",
Interval: ptypes.Duration(500 * time.Millisecond),
UnhealthyInterval: pointer(ptypes.Duration(500 * time.Millisecond)),
Timeout: ptypes.Duration(499 * time.Millisecond),
}
gauge := &testhelpers.CollectingGauge{}
@ -456,3 +457,54 @@ func TestServiceHealthChecker_Launch(t *testing.T) {
})
}
}
func TestDifferentIntervals(t *testing.T) {
// The context is passed to the health check and
// canonically canceled by the test server once all expected requests have been received.
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
healthyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
w.WriteHeader(http.StatusOK)
}))
healthyURL := testhelpers.MustParseURL(healthyServer.URL)
unhealthyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
w.WriteHeader(http.StatusServiceUnavailable)
}))
unhealthyURL := testhelpers.MustParseURL(unhealthyServer.URL)
lb := &testLoadBalancer{RWMutex: &sync.RWMutex{}}
config := &dynamic.ServerHealthCheck{
Mode: "http",
Path: "/path",
Interval: ptypes.Duration(500 * time.Millisecond),
UnhealthyInterval: pointer(ptypes.Duration(50 * time.Millisecond)),
Timeout: ptypes.Duration(499 * time.Millisecond),
}
gauge := &testhelpers.CollectingGauge{}
serviceInfo := &runtime.ServiceInfo{}
hc := NewServiceHealthChecker(ctx, &MetricsMock{gauge}, config, lb, serviceInfo, http.DefaultTransport, map[string]*url.URL{"healthy": healthyURL, "unhealthy": unhealthyURL}, "foobar")
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
hc.Launch(ctx)
wg.Done()
}()
select {
case <-time.After(2 * time.Second):
break
case <-ctx.Done():
wg.Wait()
}
lb.Lock()
defer lb.Unlock()
assert.Greater(t, lb.numRemovedServers, lb.numUpsertedServers, "removed servers greater than upserted servers")
}

View file

@ -9,6 +9,7 @@ import (
"strings"
"github.com/rs/zerolog/log"
ptypes "github.com/traefik/paerser/types"
"github.com/traefik/traefik/v3/pkg/config/dynamic"
"github.com/traefik/traefik/v3/pkg/logs"
"github.com/traefik/traefik/v3/pkg/provider"
@ -373,6 +374,17 @@ func (c configBuilder) buildServersLB(namespace string, svc traefikv1alpha1.Load
return nil, err
}
}
// If the UnhealthyInterval option is not set, we use the Interval option value,
// to check the unhealthy targets as often as the healthy ones.
if svc.HealthCheck.UnhealthyInterval == nil {
lb.HealthCheck.UnhealthyInterval = &lb.HealthCheck.Interval
} else {
var unhealthyInterval ptypes.Duration
if err := unhealthyInterval.Set(svc.HealthCheck.UnhealthyInterval.String()); err != nil {
return nil, err
}
lb.HealthCheck.UnhealthyInterval = &unhealthyInterval
}
if svc.HealthCheck.Timeout != nil {
if err := lb.HealthCheck.Timeout.Set(svc.HealthCheck.Timeout.String()); err != nil {
return nil, err

View file

@ -2647,10 +2647,11 @@ func TestLoadIngressRoutes(t *testing.T) {
FlushInterval: ptypes.Duration(100 * time.Millisecond),
},
HealthCheck: &dynamic.ServerHealthCheck{
Path: "/health",
Timeout: 5000000000,
Interval: 15000000000,
FollowRedirects: pointer(true),
Path: "/health",
Timeout: 5000000000,
Interval: 15000000000,
UnhealthyInterval: pointer(ptypes.Duration(15000000000)),
FollowRedirects: pointer(true),
},
},
},
@ -2712,10 +2713,11 @@ func TestLoadIngressRoutes(t *testing.T) {
FlushInterval: ptypes.Duration(100 * time.Millisecond),
},
HealthCheck: &dynamic.ServerHealthCheck{
Path: "/health1",
Timeout: 5000000000,
Interval: 15000000000,
FollowRedirects: pointer(true),
Path: "/health1",
Timeout: 5000000000,
Interval: 15000000000,
UnhealthyInterval: pointer(ptypes.Duration(15000000000)),
FollowRedirects: pointer(true),
},
},
},
@ -2732,10 +2734,11 @@ func TestLoadIngressRoutes(t *testing.T) {
FlushInterval: ptypes.Duration(100 * time.Millisecond),
},
HealthCheck: &dynamic.ServerHealthCheck{
Path: "/health2",
Timeout: 5000000000,
Interval: 20000000000,
FollowRedirects: pointer(true),
Path: "/health2",
Timeout: 5000000000,
Interval: 20000000000,
UnhealthyInterval: pointer(ptypes.Duration(20000000000)),
FollowRedirects: pointer(true),
},
},
},
@ -2776,10 +2779,11 @@ func TestLoadIngressRoutes(t *testing.T) {
FlushInterval: ptypes.Duration(100 * time.Millisecond),
},
HealthCheck: &dynamic.ServerHealthCheck{
Path: "/health1",
Timeout: 5000000000,
Interval: 15000000000,
FollowRedirects: pointer(true),
Path: "/health1",
Timeout: 5000000000,
Interval: 15000000000,
UnhealthyInterval: pointer(ptypes.Duration(15000000000)),
FollowRedirects: pointer(true),
},
},
},

View file

@ -170,9 +170,13 @@ type ServerHealthCheck struct {
Status int `json:"status,omitempty"`
// Port defines the server URL port for the health check endpoint.
Port int `json:"port,omitempty"`
// Interval defines the frequency of the health check calls.
// Interval defines the frequency of the health check calls for healthy targets.
// Default: 30s
Interval *intstr.IntOrString `json:"interval,omitempty"`
// UnhealthyInterval defines the frequency of the health check calls for unhealthy targets.
// When UnhealthyInterval is not defined, it defaults to the Interval value.
// Default: 30s
UnhealthyInterval *intstr.IntOrString `json:"unhealthyInterval,omitempty"`
// Timeout defines the maximum duration Traefik will wait for a health check request before considering the server unhealthy.
// Default: 5s
Timeout *intstr.IntOrString `json:"timeout,omitempty"`

View file

@ -1280,6 +1280,11 @@ func (in *ServerHealthCheck) DeepCopyInto(out *ServerHealthCheck) {
*out = new(intstr.IntOrString)
**out = **in
}
if in.UnhealthyInterval != nil {
in, out := &in.UnhealthyInterval, &out.UnhealthyInterval
*out = new(intstr.IntOrString)
**out = **in
}
if in.Timeout != nil {
in, out := &in.Timeout, &out.Timeout
*out = new(intstr.IntOrString)

View file

@ -45,6 +45,7 @@ func Test_buildConfiguration(t *testing.T) {
"traefik/http/services/Service01/loadBalancer/healthCheck/path": "foobar",
"traefik/http/services/Service01/loadBalancer/healthCheck/port": "42",
"traefik/http/services/Service01/loadBalancer/healthCheck/interval": "1s",
"traefik/http/services/Service01/loadBalancer/healthCheck/unhealthyinterval": "1s",
"traefik/http/services/Service01/loadBalancer/healthCheck/timeout": "1s",
"traefik/http/services/Service01/loadBalancer/healthCheck/hostname": "foobar",
"traefik/http/services/Service01/loadBalancer/healthCheck/headers/name0": "foobar",
@ -665,14 +666,15 @@ func Test_buildConfiguration(t *testing.T) {
},
},
HealthCheck: &dynamic.ServerHealthCheck{
Scheme: "foobar",
Mode: "foobar",
Path: "foobar",
Port: 42,
Interval: ptypes.Duration(time.Second),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
FollowRedirects: pointer(true),
Scheme: "foobar",
Mode: "foobar",
Path: "foobar",
Port: 42,
Interval: ptypes.Duration(time.Second),
UnhealthyInterval: pointer(ptypes.Duration(time.Second)),
Timeout: ptypes.Duration(time.Second),
Hostname: "foobar",
FollowRedirects: pointer(true),
Headers: map[string]string{
"name0": "foobar",
"name1": "foobar",