1
0
Fork 0

Add TCP Healthcheck

This commit is contained in:
Douglas De Toni Machado 2025-10-22 06:42:05 -03:00 committed by GitHub
parent d1ab6ed489
commit 8392503df7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 2416 additions and 307 deletions

View file

@ -39,6 +39,7 @@ type Balancer struct {
status map[string]struct{}
// updaters is the list of hooks that are run (to update the Balancer
// parent(s)), whenever the Balancer status changes.
// No mutex is needed, as it is modified only during the configuration build.
updaters []func(bool)
// fenced is the list of terminating yet still serving child services.
fenced map[string]struct{}

View file

@ -56,6 +56,7 @@ type Balancer struct {
// updaters is the list of hooks that are run (to update the Balancer
// parent(s)), whenever the Balancer status changes.
// No mutex is needed, as it is modified only during the configuration build.
updaters []func(bool)
sticky *loadbalancer.Sticky

View file

@ -40,6 +40,7 @@ type Balancer struct {
// updaters is the list of hooks that are run (to update the Balancer
// parent(s)), whenever the Balancer status changes.
// No mutex is needed, as it is modified only during the configuration build.
updaters []func(bool)
sticky *loadbalancer.Sticky

View file

@ -266,19 +266,18 @@ func (m *Manager) getWRRServiceHandler(ctx context.Context, serviceName string,
continue
}
childName := service.Name
updater, ok := serviceHandler.(healthcheck.StatusUpdater)
if !ok {
return nil, fmt.Errorf("child service %v of %v not a healthcheck.StatusUpdater (%T)", childName, serviceName, serviceHandler)
return nil, fmt.Errorf("child service %v of %v not a healthcheck.StatusUpdater (%T)", service.Name, serviceName, serviceHandler)
}
if err := updater.RegisterStatusUpdater(func(up bool) {
balancer.SetStatus(ctx, childName, up)
balancer.SetStatus(ctx, service.Name, up)
}); err != nil {
return nil, fmt.Errorf("cannot register %v as updater for %v: %w", childName, serviceName, err)
return nil, fmt.Errorf("cannot register %v as updater for %v: %w", service.Name, serviceName, err)
}
log.Ctx(ctx).Debug().Str("parent", serviceName).Str("child", childName).
log.Ctx(ctx).Debug().Str("parent", serviceName).Str("child", service.Name).
Msg("Child service will update parent on status change")
}
@ -342,19 +341,18 @@ func (m *Manager) getHRWServiceHandler(ctx context.Context, serviceName string,
continue
}
childName := service.Name
updater, ok := serviceHandler.(healthcheck.StatusUpdater)
if !ok {
return nil, fmt.Errorf("child service %v of %v not a healthcheck.StatusUpdater (%T)", childName, serviceName, serviceHandler)
return nil, fmt.Errorf("child service %v of %v not a healthcheck.StatusUpdater (%T)", service.Name, serviceName, serviceHandler)
}
if err := updater.RegisterStatusUpdater(func(up bool) {
balancer.SetStatus(ctx, childName, up)
balancer.SetStatus(ctx, service.Name, up)
}); err != nil {
return nil, fmt.Errorf("cannot register %v as updater for %v: %w", childName, serviceName, err)
return nil, fmt.Errorf("cannot register %v as updater for %v: %w", service.Name, serviceName, err)
}
log.Ctx(ctx).Debug().Str("parent", serviceName).Str("child", childName).
log.Ctx(ctx).Debug().Str("parent", serviceName).Str("child", service.Name).
Msg("Child service will update parent on status change")
}
@ -466,7 +464,7 @@ func (m *Manager) getLoadBalancerServiceHandler(ctx context.Context, serviceName
lb.AddServer(server.URL, proxy, server)
// servers are considered UP by default.
// Servers are considered UP by default.
info.UpdateServerStatus(target.String(), runtime.StatusUp)
healthCheckTargets[server.URL] = target

View file

@ -4,12 +4,15 @@ import (
"context"
"errors"
"fmt"
"maps"
"math/rand"
"net"
"slices"
"time"
"github.com/rs/zerolog/log"
"github.com/traefik/traefik/v3/pkg/config/runtime"
"github.com/traefik/traefik/v3/pkg/healthcheck"
"github.com/traefik/traefik/v3/pkg/observability/logs"
"github.com/traefik/traefik/v3/pkg/server/provider"
"github.com/traefik/traefik/v3/pkg/tcp"
@ -17,17 +20,19 @@ import (
// Manager is the TCPHandlers factory.
type Manager struct {
dialerManager *tcp.DialerManager
configs map[string]*runtime.TCPServiceInfo
rand *rand.Rand // For the initial shuffling of load-balancers.
dialerManager *tcp.DialerManager
configs map[string]*runtime.TCPServiceInfo
rand *rand.Rand // For the initial shuffling of load-balancers.
healthCheckers map[string]*healthcheck.ServiceTCPHealthChecker
}
// NewManager creates a new manager.
func NewManager(conf *runtime.Configuration, dialerManager *tcp.DialerManager) *Manager {
return &Manager{
dialerManager: dialerManager,
configs: conf.TCPServices,
rand: rand.New(rand.NewSource(time.Now().UnixNano())),
dialerManager: dialerManager,
healthCheckers: make(map[string]*healthcheck.ServiceTCPHealthChecker),
configs: conf.TCPServices,
rand: rand.New(rand.NewSource(time.Now().UnixNano())),
}
}
@ -51,7 +56,7 @@ func (m *Manager) BuildTCP(rootCtx context.Context, serviceName string) (tcp.Han
switch {
case conf.LoadBalancer != nil:
loadBalancer := tcp.NewWRRLoadBalancer()
loadBalancer := tcp.NewWRRLoadBalancer(conf.LoadBalancer.HealthCheck != nil)
if conf.LoadBalancer.TerminationDelay != nil {
log.Ctx(ctx).Warn().Msgf("Service %q load balancer uses `TerminationDelay`, but this option is deprecated, please use ServersTransport configuration instead.", serviceName)
@ -65,6 +70,8 @@ func (m *Manager) BuildTCP(rootCtx context.Context, serviceName string) (tcp.Han
conf.LoadBalancer.ServersTransport = provider.GetQualifiedName(ctx, conf.LoadBalancer.ServersTransport)
}
uniqHealthCheckTargets := make(map[string]healthcheck.TCPHealthCheckTarget, len(conf.LoadBalancer.Servers))
for index, server := range shuffle(conf.LoadBalancer.Servers, m.rand) {
srvLogger := logger.With().
Int(logs.ServerIndex, index).
@ -86,14 +93,34 @@ func (m *Manager) BuildTCP(rootCtx context.Context, serviceName string) (tcp.Han
continue
}
loadBalancer.AddServer(handler)
loadBalancer.Add(server.Address, handler, nil)
// Servers are considered UP by default.
conf.UpdateServerStatus(server.Address, runtime.StatusUp)
uniqHealthCheckTargets[server.Address] = healthcheck.TCPHealthCheckTarget{
Address: server.Address,
TLS: server.TLS,
Dialer: dialer,
}
logger.Debug().Msg("Creating TCP server")
}
if conf.LoadBalancer.HealthCheck != nil {
m.healthCheckers[serviceName] = healthcheck.NewServiceTCPHealthChecker(
ctx,
conf.LoadBalancer.HealthCheck,
loadBalancer,
conf,
slices.Collect(maps.Values(uniqHealthCheckTargets)),
serviceQualifiedName)
}
return loadBalancer, nil
case conf.Weighted != nil:
loadBalancer := tcp.NewWRRLoadBalancer()
loadBalancer := tcp.NewWRRLoadBalancer(conf.Weighted.HealthCheck != nil)
for _, service := range shuffle(conf.Weighted.Services, m.rand) {
handler, err := m.BuildTCP(ctx, service.Name)
@ -102,7 +129,25 @@ func (m *Manager) BuildTCP(rootCtx context.Context, serviceName string) (tcp.Han
return nil, err
}
loadBalancer.AddWeightServer(handler, service.Weight)
loadBalancer.Add(service.Name, handler, service.Weight)
if conf.Weighted.HealthCheck == nil {
continue
}
updater, ok := handler.(healthcheck.StatusUpdater)
if !ok {
return nil, fmt.Errorf("child service %v of %v not a healthcheck.StatusUpdater (%T)", service.Name, serviceName, handler)
}
if err := updater.RegisterStatusUpdater(func(up bool) {
loadBalancer.SetStatus(ctx, service.Name, up)
}); err != nil {
return nil, fmt.Errorf("cannot register %v as updater for %v: %w", service.Name, serviceName, err)
}
log.Ctx(ctx).Debug().Str("parent", serviceName).Str("child", service.Name).
Msg("Child service will update parent on status change")
}
return loadBalancer, nil
@ -114,6 +159,14 @@ func (m *Manager) BuildTCP(rootCtx context.Context, serviceName string) (tcp.Han
}
}
// LaunchHealthCheck launches the health checks.
func (m *Manager) LaunchHealthCheck(ctx context.Context) {
for serviceName, hc := range m.healthCheckers {
logger := log.Ctx(ctx).With().Str(logs.ServiceName, serviceName).Logger()
go hc.Launch(logger.WithContext(ctx))
}
}
func shuffle[T any](values []T, r *rand.Rand) []T {
shuffled := make([]T, len(values))
copy(shuffled, values)

View file

@ -233,6 +233,49 @@ func TestManager_BuildTCP(t *testing.T) {
providerName: "provider-1",
expectedError: "no transport configuration found for \"myServersTransport@provider-1\"",
},
{
desc: "WRR with healthcheck enabled",
stConfigs: map[string]*dynamic.TCPServersTransport{"default@internal": {}},
serviceName: "serviceName",
configs: map[string]*runtime.TCPServiceInfo{
"serviceName@provider-1": {
TCPService: &dynamic.TCPService{
Weighted: &dynamic.TCPWeightedRoundRobin{
Services: []dynamic.TCPWRRService{
{Name: "foobar@provider-1", Weight: new(int)},
{Name: "foobar2@provider-1", Weight: new(int)},
},
HealthCheck: &dynamic.HealthCheck{},
},
},
},
"foobar@provider-1": {
TCPService: &dynamic.TCPService{
LoadBalancer: &dynamic.TCPServersLoadBalancer{
Servers: []dynamic.TCPServer{
{
Address: "192.168.0.12:80",
},
},
HealthCheck: &dynamic.TCPServerHealthCheck{},
},
},
},
"foobar2@provider-1": {
TCPService: &dynamic.TCPService{
LoadBalancer: &dynamic.TCPServersLoadBalancer{
Servers: []dynamic.TCPServer{
{
Address: "192.168.0.13:80",
},
},
HealthCheck: &dynamic.TCPServerHealthCheck{},
},
},
},
},
providerName: "provider-1",
},
}
for _, test := range testCases {