1
0
Fork 0

healthcheck: add support at the load-balancers of services level

Co-authored-by: Dmitry Sharshakov <d3dx12.xx@gmail.com>
Co-authored-by: Julien Salleyron <julien.salleyron@gmail.com>
Co-authored-by: Jean-Baptiste Doumenjou <925513+jbdoumenjou@users.noreply.github.com>
Co-authored-by: Romain <rtribotte@users.noreply.github.com>
Co-authored-by: Tom Moulard <tom.moulard@traefik.io>
This commit is contained in:
mpl 2021-06-25 21:08:11 +02:00 committed by GitHub
parent 5e3e47b484
commit 838a8e18d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 1196 additions and 120 deletions

View file

@ -2,6 +2,7 @@ package healthcheck
import (
"context"
"errors"
"fmt"
"net"
"net/http"
@ -11,6 +12,7 @@ import (
"time"
gokitmetrics "github.com/go-kit/kit/metrics"
"github.com/traefik/traefik/v2/pkg/config/dynamic"
"github.com/traefik/traefik/v2/pkg/config/runtime"
"github.com/traefik/traefik/v2/pkg/log"
"github.com/traefik/traefik/v2/pkg/metrics"
@ -41,6 +43,13 @@ type BalancerHandler interface {
Balancer
}
// BalancerStatusHandler is an http Handler that does load-balancing,
// andupdates its parents of its status.
type BalancerStatusHandler interface {
BalancerHandler
StatusUpdater
}
type metricsHealthcheck struct {
serverUpGauge gokitmetrics.Gauge
}
@ -130,9 +139,10 @@ func (hc *HealthCheck) SetBackendsConfiguration(parentCtx context.Context, backe
func (hc *HealthCheck) execute(ctx context.Context, backend *BackendConfig) {
logger := log.FromContext(ctx)
logger.Debugf("Initial health check for backend: %q", backend.name)
hc.checkBackend(ctx, backend)
logger.Debugf("Initial health check for backend: %q", backend.name)
hc.checkServersLB(ctx, backend)
ticker := time.NewTicker(backend.Interval)
defer ticker.Stop()
for {
@ -141,13 +151,13 @@ func (hc *HealthCheck) execute(ctx context.Context, backend *BackendConfig) {
logger.Debugf("Stopping current health check goroutines of backend: %s", backend.name)
return
case <-ticker.C:
logger.Debugf("Refreshing health check for backend: %s", backend.name)
hc.checkBackend(ctx, backend)
logger.Debugf("Routine health check refresh for backend: %s", backend.name)
hc.checkServersLB(ctx, backend)
}
}
}
func (hc *HealthCheck) checkBackend(ctx context.Context, backend *BackendConfig) {
func (hc *HealthCheck) checkServersLB(ctx context.Context, backend *BackendConfig) {
logger := log.FromContext(ctx)
enabledURLs := backend.LB.Servers()
@ -157,12 +167,11 @@ func (hc *HealthCheck) checkBackend(ctx context.Context, backend *BackendConfig)
serverUpMetricValue := float64(0)
if err := checkHealth(disabledURL.url, backend); err == nil {
logger.Warnf("Health check up: Returning to server list. Backend: %q URL: %q Weight: %d",
logger.Warnf("Health check up: returning to server list. Backend: %q URL: %q Weight: %d",
backend.name, disabledURL.url.String(), disabledURL.weight)
if err = backend.LB.UpsertServer(disabledURL.url, roundrobin.Weight(disabledURL.weight)); err != nil {
logger.Error(err)
}
serverUpMetricValue = 1
} else {
logger.Warnf("Health check still failing. Backend: %q URL: %q Reason: %s", backend.name, disabledURL.url.String(), err)
@ -175,31 +184,31 @@ func (hc *HealthCheck) checkBackend(ctx context.Context, backend *BackendConfig)
backend.disabledURLs = newDisabledURLs
for _, enableURL := range enabledURLs {
for _, enabledURL := range enabledURLs {
serverUpMetricValue := float64(1)
if err := checkHealth(enableURL, backend); err != nil {
if err := checkHealth(enabledURL, backend); err != nil {
weight := 1
rr, ok := backend.LB.(*roundrobin.RoundRobin)
if ok {
var gotWeight bool
weight, gotWeight = rr.ServerWeight(enableURL)
weight, gotWeight = rr.ServerWeight(enabledURL)
if !gotWeight {
weight = 1
}
}
logger.Warnf("Health check failed, removing from server list. Backend: %q URL: %q Weight: %d Reason: %s",
backend.name, enableURL.String(), weight, err)
if err := backend.LB.RemoveServer(enableURL); err != nil {
backend.name, enabledURL.String(), weight, err)
if err := backend.LB.RemoveServer(enabledURL); err != nil {
logger.Error(err)
}
backend.disabledURLs = append(backend.disabledURLs, backendURL{enableURL, weight})
backend.disabledURLs = append(backend.disabledURLs, backendURL{enabledURL, weight})
serverUpMetricValue = 0
}
labelValues := []string{"service", backend.name, "url", enableURL.String()}
labelValues := []string{"service", backend.name, "url", enabledURL.String()}
hc.metrics.serverUpGauge.With(labelValues...).Set(serverUpMetricValue)
}
}
@ -264,11 +273,19 @@ func checkHealth(serverURL *url.URL, backend *BackendConfig) error {
return nil
}
// StatusUpdater should be implemented by a service that, when its status
// changes (e.g. all if its children are down), needs to propagate upwards (to
// their parent(s)) that change.
type StatusUpdater interface {
RegisterStatusUpdater(fn func(up bool)) error
}
// NewLBStatusUpdater returns a new LbStatusUpdater.
func NewLBStatusUpdater(bh BalancerHandler, info *runtime.ServiceInfo) *LbStatusUpdater {
func NewLBStatusUpdater(bh BalancerHandler, info *runtime.ServiceInfo, hc *dynamic.ServerHealthCheck) *LbStatusUpdater {
return &LbStatusUpdater{
BalancerHandler: bh,
serviceInfo: info,
BalancerHandler: bh,
serviceInfo: info,
wantsHealthCheck: hc != nil,
}
}
@ -276,27 +293,83 @@ func NewLBStatusUpdater(bh BalancerHandler, info *runtime.ServiceInfo) *LbStatus
// so it can keep track of the status of a server in the ServiceInfo.
type LbStatusUpdater struct {
BalancerHandler
serviceInfo *runtime.ServiceInfo // can be nil
serviceInfo *runtime.ServiceInfo // can be nil
updaters []func(up bool)
wantsHealthCheck bool
}
// RegisterStatusUpdater adds fn to the list of hooks that are run when the
// status of the Balancer changes.
// Not thread safe.
func (lb *LbStatusUpdater) RegisterStatusUpdater(fn func(up bool)) error {
if !lb.wantsHealthCheck {
return errors.New("healthCheck not enabled in config for this loadbalancer service")
}
lb.updaters = append(lb.updaters, fn)
return nil
}
// RemoveServer removes the given server from the BalancerHandler,
// and updates the status of the server to "DOWN".
func (lb *LbStatusUpdater) RemoveServer(u *url.URL) error {
// TODO(mpl): when we have the freedom to change the signature of RemoveServer
// (kinda stuck because of oxy for now), let's pass around a context to improve
// logging.
ctx := context.TODO()
upBefore := len(lb.BalancerHandler.Servers()) > 0
err := lb.BalancerHandler.RemoveServer(u)
if err == nil && lb.serviceInfo != nil {
if err != nil {
return err
}
if lb.serviceInfo != nil {
lb.serviceInfo.UpdateServerStatus(u.String(), serverDown)
}
return err
log.FromContext(ctx).Debugf("child %s now %s", u.String(), serverDown)
if !upBefore {
// we were already down, and we still are, no need to propagate.
log.FromContext(ctx).Debugf("Still %s, no need to propagate", serverDown)
return nil
}
if len(lb.BalancerHandler.Servers()) > 0 {
// we were up, and we still are, no need to propagate
log.FromContext(ctx).Debugf("Still %s, no need to propagate", serverUp)
return nil
}
log.FromContext(ctx).Debugf("Propagating new %s status", serverDown)
for _, fn := range lb.updaters {
fn(false)
}
return nil
}
// UpsertServer adds the given server to the BalancerHandler,
// and updates the status of the server to "UP".
func (lb *LbStatusUpdater) UpsertServer(u *url.URL, options ...roundrobin.ServerOption) error {
ctx := context.TODO()
upBefore := len(lb.BalancerHandler.Servers()) > 0
err := lb.BalancerHandler.UpsertServer(u, options...)
if err == nil && lb.serviceInfo != nil {
if err != nil {
return err
}
if lb.serviceInfo != nil {
lb.serviceInfo.UpdateServerStatus(u.String(), serverUp)
}
return err
log.FromContext(ctx).Debugf("child %s now %s", u.String(), serverUp)
if upBefore {
// we were up, and we still are, no need to propagate
log.FromContext(ctx).Debugf("Still %s, no need to propagate", serverUp)
return nil
}
log.FromContext(ctx).Debugf("Propagating new %s status", serverUp)
for _, fn := range lb.updaters {
fn(true)
}
return nil
}
// Balancers is a list of Balancers(s) that implements the Balancer interface.

View file

@ -445,7 +445,7 @@ func (th *testHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
func TestLBStatusUpdater(t *testing.T) {
lb := &testLoadBalancer{RWMutex: &sync.RWMutex{}}
svInfo := &runtime.ServiceInfo{}
lbsu := NewLBStatusUpdater(lb, svInfo)
lbsu := NewLBStatusUpdater(lb, svInfo, nil)
newServer, err := url.Parse("http://foo.com")
assert.NoError(t, err)
err = lbsu.UpsertServer(newServer, roundrobin.Weight(1))