1
0
Fork 0

Add TCP Healthcheck

This commit is contained in:
Douglas De Toni Machado 2025-10-22 06:42:05 -03:00 committed by GitHub
parent d1ab6ed489
commit 8392503df7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 2416 additions and 307 deletions

View file

@ -1,6 +1,7 @@
package tcp
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
@ -33,6 +34,7 @@ type ClientConn interface {
// Dialer is an interface to dial a network connection, with support for PROXY protocol and termination delay.
type Dialer interface {
Dial(network, addr string, clientConn ClientConn) (c net.Conn, err error)
DialContext(ctx context.Context, network, addr string, clientConn ClientConn) (c net.Conn, err error)
TerminationDelay() time.Duration
}
@ -49,7 +51,12 @@ func (d tcpDialer) TerminationDelay() time.Duration {
// Dial dials a network connection and optionally sends a PROXY protocol header.
func (d tcpDialer) Dial(network, addr string, clientConn ClientConn) (net.Conn, error) {
conn, err := d.dialer.Dial(network, addr)
return d.DialContext(context.Background(), network, addr, clientConn)
}
// DialContext dials a network connection and optionally sends a PROXY protocol header, with context.
func (d tcpDialer) DialContext(ctx context.Context, network, addr string, clientConn ClientConn) (net.Conn, error) {
conn, err := d.dialer.DialContext(ctx, network, addr)
if err != nil {
return nil, err
}
@ -72,7 +79,12 @@ type tcpTLSDialer struct {
// Dial dials a network connection with the wrapped tcpDialer and performs a TLS handshake.
func (d tcpTLSDialer) Dial(network, addr string, clientConn ClientConn) (net.Conn, error) {
conn, err := d.tcpDialer.Dial(network, addr, clientConn)
return d.DialContext(context.Background(), network, addr, clientConn)
}
// DialContext dials a network connection with the wrapped tcpDialer and performs a TLS handshake, with context.
func (d tcpTLSDialer) DialContext(ctx context.Context, network, addr string, clientConn ClientConn) (net.Conn, error) {
conn, err := d.tcpDialer.DialContext(ctx, network, addr, clientConn)
if err != nil {
return nil, err
}

View file

@ -1,6 +1,7 @@
package tcp
import (
"context"
"errors"
"sync"
@ -11,30 +12,42 @@ var errNoServersInPool = errors.New("no servers in the pool")
type server struct {
Handler
name string
weight int
}
// WRRLoadBalancer is a naive RoundRobin load balancer for TCP services.
type WRRLoadBalancer struct {
servers []server
lock sync.Mutex
currentWeight int
index int
// serversMu is a mutex to protect the handlers slice and the status.
serversMu sync.Mutex
servers []server
// status is a record of which child services of the Balancer are healthy, keyed
// by name of child service. A service is initially added to the map when it is
// created via Add, and it is later removed or added to the map as needed,
// through the SetStatus method.
status map[string]struct{}
// updaters is the list of hooks that are run (to update the Balancer parent(s)), whenever the Balancer status changes.
// No mutex is needed, as it is modified only during the configuration build.
updaters []func(bool)
index int
currentWeight int
wantsHealthCheck bool
}
// NewWRRLoadBalancer creates a new WRRLoadBalancer.
func NewWRRLoadBalancer() *WRRLoadBalancer {
func NewWRRLoadBalancer(wantsHealthCheck bool) *WRRLoadBalancer {
return &WRRLoadBalancer{
index: -1,
status: make(map[string]struct{}),
index: -1,
wantsHealthCheck: wantsHealthCheck,
}
}
// ServeTCP forwards the connection to the right service.
func (b *WRRLoadBalancer) ServeTCP(conn WriteCloser) {
b.lock.Lock()
next, err := b.next()
b.lock.Unlock()
next, err := b.nextServer()
if err != nil {
if !errors.Is(err, errNoServersInPool) {
log.Error().Err(err).Msg("Error during load balancing")
@ -46,22 +59,103 @@ func (b *WRRLoadBalancer) ServeTCP(conn WriteCloser) {
next.ServeTCP(conn)
}
// AddServer appends a server to the existing list.
func (b *WRRLoadBalancer) AddServer(serverHandler Handler) {
w := 1
b.AddWeightServer(serverHandler, &w)
}
// AddWeightServer appends a server to the existing list with a weight.
func (b *WRRLoadBalancer) AddWeightServer(serverHandler Handler, weight *int) {
b.lock.Lock()
defer b.lock.Unlock()
// Add appends a server to the existing list with a name and weight.
func (b *WRRLoadBalancer) Add(name string, handler Handler, weight *int) {
w := 1
if weight != nil {
w = *weight
}
b.servers = append(b.servers, server{Handler: serverHandler, weight: w})
b.serversMu.Lock()
b.servers = append(b.servers, server{Handler: handler, name: name, weight: w})
b.status[name] = struct{}{}
b.serversMu.Unlock()
}
// SetStatus sets status (UP or DOWN) of a target server.
func (b *WRRLoadBalancer) SetStatus(ctx context.Context, childName string, up bool) {
b.serversMu.Lock()
defer b.serversMu.Unlock()
upBefore := len(b.status) > 0
status := "DOWN"
if up {
status = "UP"
}
log.Ctx(ctx).Debug().Msgf("Setting status of %s to %v", childName, status)
if up {
b.status[childName] = struct{}{}
} else {
delete(b.status, childName)
}
upAfter := len(b.status) > 0
status = "DOWN"
if upAfter {
status = "UP"
}
// No Status Change
if upBefore == upAfter {
// We're still with the same status, no need to propagate
log.Ctx(ctx).Debug().Msgf("Still %s, no need to propagate", status)
return
}
// Status Change
log.Ctx(ctx).Debug().Msgf("Propagating new %s status", status)
for _, fn := range b.updaters {
fn(upAfter)
}
}
func (b *WRRLoadBalancer) RegisterStatusUpdater(fn func(up bool)) error {
if !b.wantsHealthCheck {
return errors.New("healthCheck not enabled in config for this weighted service")
}
b.updaters = append(b.updaters, fn)
return nil
}
func (b *WRRLoadBalancer) nextServer() (Handler, error) {
b.serversMu.Lock()
defer b.serversMu.Unlock()
if len(b.servers) == 0 || len(b.status) == 0 {
return nil, errNoServersInPool
}
// The algo below may look messy, but is actually very simple
// it calculates the GCD and subtracts it on every iteration, what interleaves servers
// and allows us not to build an iterator every time we readjust weights.
// Maximum weight across all enabled servers.
maximum := b.maxWeight()
if maximum == 0 {
return nil, errors.New("all servers have 0 weight")
}
// GCD across all enabled servers
gcd := b.weightGcd()
for {
b.index = (b.index + 1) % len(b.servers)
if b.index == 0 {
b.currentWeight -= gcd
if b.currentWeight <= 0 {
b.currentWeight = maximum
}
}
srv := b.servers[b.index]
if _, ok := b.status[srv.name]; ok && srv.weight >= b.currentWeight {
return srv, nil
}
}
}
func (b *WRRLoadBalancer) maxWeight() int {
@ -92,36 +186,3 @@ func gcd(a, b int) int {
}
return a
}
func (b *WRRLoadBalancer) next() (Handler, error) {
if len(b.servers) == 0 {
return nil, errNoServersInPool
}
// The algo below may look messy, but is actually very simple
// it calculates the GCD and subtracts it on every iteration, what interleaves servers
// and allows us not to build an iterator every time we readjust weights
// Maximum weight across all enabled servers
maximum := b.maxWeight()
if maximum == 0 {
return nil, errors.New("all servers have 0 weight")
}
// GCD across all enabled servers
gcd := b.weightGcd()
for {
b.index = (b.index + 1) % len(b.servers)
if b.index == 0 {
b.currentWeight -= gcd
if b.currentWeight <= 0 {
b.currentWeight = maximum
}
}
srv := b.servers[b.index]
if srv.weight >= b.currentWeight {
return srv, nil
}
}
}

View file

@ -9,50 +9,7 @@ import (
"github.com/stretchr/testify/require"
)
type fakeConn struct {
writeCall map[string]int
closeCall int
}
func (f *fakeConn) Read(b []byte) (n int, err error) {
panic("implement me")
}
func (f *fakeConn) Write(b []byte) (n int, err error) {
f.writeCall[string(b)]++
return len(b), nil
}
func (f *fakeConn) Close() error {
f.closeCall++
return nil
}
func (f *fakeConn) LocalAddr() net.Addr {
panic("implement me")
}
func (f *fakeConn) RemoteAddr() net.Addr {
panic("implement me")
}
func (f *fakeConn) SetDeadline(t time.Time) error {
panic("implement me")
}
func (f *fakeConn) SetReadDeadline(t time.Time) error {
panic("implement me")
}
func (f *fakeConn) SetWriteDeadline(t time.Time) error {
panic("implement me")
}
func (f *fakeConn) CloseWrite() error {
panic("implement me")
}
func TestLoadBalancing(t *testing.T) {
func TestWRRLoadBalancer_LoadBalancing(t *testing.T) {
testCases := []struct {
desc string
serversWeight map[string]int
@ -124,9 +81,9 @@ func TestLoadBalancing(t *testing.T) {
t.Run(test.desc, func(t *testing.T) {
t.Parallel()
balancer := NewWRRLoadBalancer()
balancer := NewWRRLoadBalancer(false)
for server, weight := range test.serversWeight {
balancer.AddWeightServer(HandlerFunc(func(conn WriteCloser) {
balancer.Add(server, HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte(server))
require.NoError(t, err)
}), &weight)
@ -142,3 +99,196 @@ func TestLoadBalancing(t *testing.T) {
})
}
}
func TestWRRLoadBalancer_NoServiceUp(t *testing.T) {
balancer := NewWRRLoadBalancer(false)
balancer.Add("first", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("first"))
require.NoError(t, err)
}), pointer(1))
balancer.Add("second", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("second"))
require.NoError(t, err)
}), pointer(1))
balancer.SetStatus(t.Context(), "first", false)
balancer.SetStatus(t.Context(), "second", false)
conn := &fakeConn{writeCall: make(map[string]int)}
balancer.ServeTCP(conn)
assert.Empty(t, conn.writeCall)
assert.Equal(t, 1, conn.closeCall)
}
func TestWRRLoadBalancer_OneServerDown(t *testing.T) {
balancer := NewWRRLoadBalancer(false)
balancer.Add("first", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("first"))
require.NoError(t, err)
}), pointer(1))
balancer.Add("second", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("second"))
require.NoError(t, err)
}), pointer(1))
balancer.SetStatus(t.Context(), "second", false)
conn := &fakeConn{writeCall: make(map[string]int)}
for range 3 {
balancer.ServeTCP(conn)
}
assert.Equal(t, 3, conn.writeCall["first"])
}
func TestWRRLoadBalancer_DownThenUp(t *testing.T) {
balancer := NewWRRLoadBalancer(false)
balancer.Add("first", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("first"))
require.NoError(t, err)
}), pointer(1))
balancer.Add("second", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("second"))
require.NoError(t, err)
}), pointer(1))
balancer.SetStatus(t.Context(), "second", false)
conn := &fakeConn{writeCall: make(map[string]int)}
for range 3 {
balancer.ServeTCP(conn)
}
assert.Equal(t, 3, conn.writeCall["first"])
balancer.SetStatus(t.Context(), "second", true)
conn = &fakeConn{writeCall: make(map[string]int)}
for range 2 {
balancer.ServeTCP(conn)
}
assert.Equal(t, 1, conn.writeCall["first"])
assert.Equal(t, 1, conn.writeCall["second"])
}
func TestWRRLoadBalancer_Propagate(t *testing.T) {
balancer1 := NewWRRLoadBalancer(true)
balancer1.Add("first", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("first"))
require.NoError(t, err)
}), pointer(1))
balancer1.Add("second", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("second"))
require.NoError(t, err)
}), pointer(1))
balancer2 := NewWRRLoadBalancer(true)
balancer2.Add("third", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("third"))
require.NoError(t, err)
}), pointer(1))
balancer2.Add("fourth", HandlerFunc(func(conn WriteCloser) {
_, err := conn.Write([]byte("fourth"))
require.NoError(t, err)
}), pointer(1))
topBalancer := NewWRRLoadBalancer(true)
topBalancer.Add("balancer1", balancer1, pointer(1))
_ = balancer1.RegisterStatusUpdater(func(up bool) {
topBalancer.SetStatus(t.Context(), "balancer1", up)
})
topBalancer.Add("balancer2", balancer2, pointer(1))
_ = balancer2.RegisterStatusUpdater(func(up bool) {
topBalancer.SetStatus(t.Context(), "balancer2", up)
})
conn := &fakeConn{writeCall: make(map[string]int)}
for range 8 {
topBalancer.ServeTCP(conn)
}
assert.Equal(t, 2, conn.writeCall["first"])
assert.Equal(t, 2, conn.writeCall["second"])
assert.Equal(t, 2, conn.writeCall["third"])
assert.Equal(t, 2, conn.writeCall["fourth"])
// fourth gets downed, but balancer2 still up since third is still up.
balancer2.SetStatus(t.Context(), "fourth", false)
conn = &fakeConn{writeCall: make(map[string]int)}
for range 8 {
topBalancer.ServeTCP(conn)
}
assert.Equal(t, 2, conn.writeCall["first"])
assert.Equal(t, 2, conn.writeCall["second"])
assert.Equal(t, 4, conn.writeCall["third"])
assert.Equal(t, 0, conn.writeCall["fourth"])
// third gets downed, and the propagation triggers balancer2 to be marked as
// down as well for topBalancer.
balancer2.SetStatus(t.Context(), "third", false)
conn = &fakeConn{writeCall: make(map[string]int)}
for range 8 {
topBalancer.ServeTCP(conn)
}
assert.Equal(t, 4, conn.writeCall["first"])
assert.Equal(t, 4, conn.writeCall["second"])
assert.Equal(t, 0, conn.writeCall["third"])
assert.Equal(t, 0, conn.writeCall["fourth"])
}
func pointer[T any](v T) *T { return &v }
type fakeConn struct {
writeCall map[string]int
closeCall int
}
func (f *fakeConn) Read(b []byte) (n int, err error) {
panic("implement me")
}
func (f *fakeConn) Write(b []byte) (n int, err error) {
f.writeCall[string(b)]++
return len(b), nil
}
func (f *fakeConn) Close() error {
f.closeCall++
return nil
}
func (f *fakeConn) LocalAddr() net.Addr {
panic("implement me")
}
func (f *fakeConn) RemoteAddr() net.Addr {
panic("implement me")
}
func (f *fakeConn) SetDeadline(t time.Time) error {
panic("implement me")
}
func (f *fakeConn) SetReadDeadline(t time.Time) error {
panic("implement me")
}
func (f *fakeConn) SetWriteDeadline(t time.Time) error {
panic("implement me")
}
func (f *fakeConn) CloseWrite() error {
panic("implement me")
}