Add TCP Healthcheck
This commit is contained in:
parent
d1ab6ed489
commit
8392503df7
37 changed files with 2416 additions and 307 deletions
|
|
@ -1,6 +1,7 @@
|
|||
package tcp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"errors"
|
||||
|
|
@ -33,6 +34,7 @@ type ClientConn interface {
|
|||
// Dialer is an interface to dial a network connection, with support for PROXY protocol and termination delay.
|
||||
type Dialer interface {
|
||||
Dial(network, addr string, clientConn ClientConn) (c net.Conn, err error)
|
||||
DialContext(ctx context.Context, network, addr string, clientConn ClientConn) (c net.Conn, err error)
|
||||
TerminationDelay() time.Duration
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +51,12 @@ func (d tcpDialer) TerminationDelay() time.Duration {
|
|||
|
||||
// Dial dials a network connection and optionally sends a PROXY protocol header.
|
||||
func (d tcpDialer) Dial(network, addr string, clientConn ClientConn) (net.Conn, error) {
|
||||
conn, err := d.dialer.Dial(network, addr)
|
||||
return d.DialContext(context.Background(), network, addr, clientConn)
|
||||
}
|
||||
|
||||
// DialContext dials a network connection and optionally sends a PROXY protocol header, with context.
|
||||
func (d tcpDialer) DialContext(ctx context.Context, network, addr string, clientConn ClientConn) (net.Conn, error) {
|
||||
conn, err := d.dialer.DialContext(ctx, network, addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -72,7 +79,12 @@ type tcpTLSDialer struct {
|
|||
|
||||
// Dial dials a network connection with the wrapped tcpDialer and performs a TLS handshake.
|
||||
func (d tcpTLSDialer) Dial(network, addr string, clientConn ClientConn) (net.Conn, error) {
|
||||
conn, err := d.tcpDialer.Dial(network, addr, clientConn)
|
||||
return d.DialContext(context.Background(), network, addr, clientConn)
|
||||
}
|
||||
|
||||
// DialContext dials a network connection with the wrapped tcpDialer and performs a TLS handshake, with context.
|
||||
func (d tcpTLSDialer) DialContext(ctx context.Context, network, addr string, clientConn ClientConn) (net.Conn, error) {
|
||||
conn, err := d.tcpDialer.DialContext(ctx, network, addr, clientConn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package tcp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
|
|
@ -11,30 +12,42 @@ var errNoServersInPool = errors.New("no servers in the pool")
|
|||
|
||||
type server struct {
|
||||
Handler
|
||||
name string
|
||||
weight int
|
||||
}
|
||||
|
||||
// WRRLoadBalancer is a naive RoundRobin load balancer for TCP services.
|
||||
type WRRLoadBalancer struct {
|
||||
servers []server
|
||||
lock sync.Mutex
|
||||
currentWeight int
|
||||
index int
|
||||
// serversMu is a mutex to protect the handlers slice and the status.
|
||||
serversMu sync.Mutex
|
||||
servers []server
|
||||
// status is a record of which child services of the Balancer are healthy, keyed
|
||||
// by name of child service. A service is initially added to the map when it is
|
||||
// created via Add, and it is later removed or added to the map as needed,
|
||||
// through the SetStatus method.
|
||||
status map[string]struct{}
|
||||
|
||||
// updaters is the list of hooks that are run (to update the Balancer parent(s)), whenever the Balancer status changes.
|
||||
// No mutex is needed, as it is modified only during the configuration build.
|
||||
updaters []func(bool)
|
||||
|
||||
index int
|
||||
currentWeight int
|
||||
wantsHealthCheck bool
|
||||
}
|
||||
|
||||
// NewWRRLoadBalancer creates a new WRRLoadBalancer.
|
||||
func NewWRRLoadBalancer() *WRRLoadBalancer {
|
||||
func NewWRRLoadBalancer(wantsHealthCheck bool) *WRRLoadBalancer {
|
||||
return &WRRLoadBalancer{
|
||||
index: -1,
|
||||
status: make(map[string]struct{}),
|
||||
index: -1,
|
||||
wantsHealthCheck: wantsHealthCheck,
|
||||
}
|
||||
}
|
||||
|
||||
// ServeTCP forwards the connection to the right service.
|
||||
func (b *WRRLoadBalancer) ServeTCP(conn WriteCloser) {
|
||||
b.lock.Lock()
|
||||
next, err := b.next()
|
||||
b.lock.Unlock()
|
||||
|
||||
next, err := b.nextServer()
|
||||
if err != nil {
|
||||
if !errors.Is(err, errNoServersInPool) {
|
||||
log.Error().Err(err).Msg("Error during load balancing")
|
||||
|
|
@ -46,22 +59,103 @@ func (b *WRRLoadBalancer) ServeTCP(conn WriteCloser) {
|
|||
next.ServeTCP(conn)
|
||||
}
|
||||
|
||||
// AddServer appends a server to the existing list.
|
||||
func (b *WRRLoadBalancer) AddServer(serverHandler Handler) {
|
||||
w := 1
|
||||
b.AddWeightServer(serverHandler, &w)
|
||||
}
|
||||
|
||||
// AddWeightServer appends a server to the existing list with a weight.
|
||||
func (b *WRRLoadBalancer) AddWeightServer(serverHandler Handler, weight *int) {
|
||||
b.lock.Lock()
|
||||
defer b.lock.Unlock()
|
||||
|
||||
// Add appends a server to the existing list with a name and weight.
|
||||
func (b *WRRLoadBalancer) Add(name string, handler Handler, weight *int) {
|
||||
w := 1
|
||||
if weight != nil {
|
||||
w = *weight
|
||||
}
|
||||
b.servers = append(b.servers, server{Handler: serverHandler, weight: w})
|
||||
|
||||
b.serversMu.Lock()
|
||||
b.servers = append(b.servers, server{Handler: handler, name: name, weight: w})
|
||||
b.status[name] = struct{}{}
|
||||
b.serversMu.Unlock()
|
||||
}
|
||||
|
||||
// SetStatus sets status (UP or DOWN) of a target server.
|
||||
func (b *WRRLoadBalancer) SetStatus(ctx context.Context, childName string, up bool) {
|
||||
b.serversMu.Lock()
|
||||
defer b.serversMu.Unlock()
|
||||
|
||||
upBefore := len(b.status) > 0
|
||||
|
||||
status := "DOWN"
|
||||
if up {
|
||||
status = "UP"
|
||||
}
|
||||
|
||||
log.Ctx(ctx).Debug().Msgf("Setting status of %s to %v", childName, status)
|
||||
|
||||
if up {
|
||||
b.status[childName] = struct{}{}
|
||||
} else {
|
||||
delete(b.status, childName)
|
||||
}
|
||||
|
||||
upAfter := len(b.status) > 0
|
||||
status = "DOWN"
|
||||
if upAfter {
|
||||
status = "UP"
|
||||
}
|
||||
|
||||
// No Status Change
|
||||
if upBefore == upAfter {
|
||||
// We're still with the same status, no need to propagate
|
||||
log.Ctx(ctx).Debug().Msgf("Still %s, no need to propagate", status)
|
||||
return
|
||||
}
|
||||
|
||||
// Status Change
|
||||
log.Ctx(ctx).Debug().Msgf("Propagating new %s status", status)
|
||||
for _, fn := range b.updaters {
|
||||
fn(upAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *WRRLoadBalancer) RegisterStatusUpdater(fn func(up bool)) error {
|
||||
if !b.wantsHealthCheck {
|
||||
return errors.New("healthCheck not enabled in config for this weighted service")
|
||||
}
|
||||
|
||||
b.updaters = append(b.updaters, fn)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *WRRLoadBalancer) nextServer() (Handler, error) {
|
||||
b.serversMu.Lock()
|
||||
defer b.serversMu.Unlock()
|
||||
|
||||
if len(b.servers) == 0 || len(b.status) == 0 {
|
||||
return nil, errNoServersInPool
|
||||
}
|
||||
|
||||
// The algo below may look messy, but is actually very simple
|
||||
// it calculates the GCD and subtracts it on every iteration, what interleaves servers
|
||||
// and allows us not to build an iterator every time we readjust weights.
|
||||
|
||||
// Maximum weight across all enabled servers.
|
||||
maximum := b.maxWeight()
|
||||
if maximum == 0 {
|
||||
return nil, errors.New("all servers have 0 weight")
|
||||
}
|
||||
|
||||
// GCD across all enabled servers
|
||||
gcd := b.weightGcd()
|
||||
|
||||
for {
|
||||
b.index = (b.index + 1) % len(b.servers)
|
||||
if b.index == 0 {
|
||||
b.currentWeight -= gcd
|
||||
if b.currentWeight <= 0 {
|
||||
b.currentWeight = maximum
|
||||
}
|
||||
}
|
||||
srv := b.servers[b.index]
|
||||
|
||||
if _, ok := b.status[srv.name]; ok && srv.weight >= b.currentWeight {
|
||||
return srv, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *WRRLoadBalancer) maxWeight() int {
|
||||
|
|
@ -92,36 +186,3 @@ func gcd(a, b int) int {
|
|||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func (b *WRRLoadBalancer) next() (Handler, error) {
|
||||
if len(b.servers) == 0 {
|
||||
return nil, errNoServersInPool
|
||||
}
|
||||
|
||||
// The algo below may look messy, but is actually very simple
|
||||
// it calculates the GCD and subtracts it on every iteration, what interleaves servers
|
||||
// and allows us not to build an iterator every time we readjust weights
|
||||
|
||||
// Maximum weight across all enabled servers
|
||||
maximum := b.maxWeight()
|
||||
if maximum == 0 {
|
||||
return nil, errors.New("all servers have 0 weight")
|
||||
}
|
||||
|
||||
// GCD across all enabled servers
|
||||
gcd := b.weightGcd()
|
||||
|
||||
for {
|
||||
b.index = (b.index + 1) % len(b.servers)
|
||||
if b.index == 0 {
|
||||
b.currentWeight -= gcd
|
||||
if b.currentWeight <= 0 {
|
||||
b.currentWeight = maximum
|
||||
}
|
||||
}
|
||||
srv := b.servers[b.index]
|
||||
if srv.weight >= b.currentWeight {
|
||||
return srv, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,50 +9,7 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type fakeConn struct {
|
||||
writeCall map[string]int
|
||||
closeCall int
|
||||
}
|
||||
|
||||
func (f *fakeConn) Read(b []byte) (n int, err error) {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) Write(b []byte) (n int, err error) {
|
||||
f.writeCall[string(b)]++
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (f *fakeConn) Close() error {
|
||||
f.closeCall++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeConn) LocalAddr() net.Addr {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) RemoteAddr() net.Addr {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) SetDeadline(t time.Time) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) SetReadDeadline(t time.Time) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) SetWriteDeadline(t time.Time) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) CloseWrite() error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func TestLoadBalancing(t *testing.T) {
|
||||
func TestWRRLoadBalancer_LoadBalancing(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
serversWeight map[string]int
|
||||
|
|
@ -124,9 +81,9 @@ func TestLoadBalancing(t *testing.T) {
|
|||
t.Run(test.desc, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
balancer := NewWRRLoadBalancer()
|
||||
balancer := NewWRRLoadBalancer(false)
|
||||
for server, weight := range test.serversWeight {
|
||||
balancer.AddWeightServer(HandlerFunc(func(conn WriteCloser) {
|
||||
balancer.Add(server, HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte(server))
|
||||
require.NoError(t, err)
|
||||
}), &weight)
|
||||
|
|
@ -142,3 +99,196 @@ func TestLoadBalancing(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWRRLoadBalancer_NoServiceUp(t *testing.T) {
|
||||
balancer := NewWRRLoadBalancer(false)
|
||||
|
||||
balancer.Add("first", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("first"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer.Add("second", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("second"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer.SetStatus(t.Context(), "first", false)
|
||||
balancer.SetStatus(t.Context(), "second", false)
|
||||
|
||||
conn := &fakeConn{writeCall: make(map[string]int)}
|
||||
balancer.ServeTCP(conn)
|
||||
|
||||
assert.Empty(t, conn.writeCall)
|
||||
assert.Equal(t, 1, conn.closeCall)
|
||||
}
|
||||
|
||||
func TestWRRLoadBalancer_OneServerDown(t *testing.T) {
|
||||
balancer := NewWRRLoadBalancer(false)
|
||||
|
||||
balancer.Add("first", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("first"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer.Add("second", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("second"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer.SetStatus(t.Context(), "second", false)
|
||||
|
||||
conn := &fakeConn{writeCall: make(map[string]int)}
|
||||
for range 3 {
|
||||
balancer.ServeTCP(conn)
|
||||
}
|
||||
assert.Equal(t, 3, conn.writeCall["first"])
|
||||
}
|
||||
|
||||
func TestWRRLoadBalancer_DownThenUp(t *testing.T) {
|
||||
balancer := NewWRRLoadBalancer(false)
|
||||
|
||||
balancer.Add("first", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("first"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer.Add("second", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("second"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer.SetStatus(t.Context(), "second", false)
|
||||
|
||||
conn := &fakeConn{writeCall: make(map[string]int)}
|
||||
for range 3 {
|
||||
balancer.ServeTCP(conn)
|
||||
}
|
||||
assert.Equal(t, 3, conn.writeCall["first"])
|
||||
|
||||
balancer.SetStatus(t.Context(), "second", true)
|
||||
|
||||
conn = &fakeConn{writeCall: make(map[string]int)}
|
||||
for range 2 {
|
||||
balancer.ServeTCP(conn)
|
||||
}
|
||||
assert.Equal(t, 1, conn.writeCall["first"])
|
||||
assert.Equal(t, 1, conn.writeCall["second"])
|
||||
}
|
||||
|
||||
func TestWRRLoadBalancer_Propagate(t *testing.T) {
|
||||
balancer1 := NewWRRLoadBalancer(true)
|
||||
|
||||
balancer1.Add("first", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("first"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer1.Add("second", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("second"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer2 := NewWRRLoadBalancer(true)
|
||||
|
||||
balancer2.Add("third", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("third"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
balancer2.Add("fourth", HandlerFunc(func(conn WriteCloser) {
|
||||
_, err := conn.Write([]byte("fourth"))
|
||||
require.NoError(t, err)
|
||||
}), pointer(1))
|
||||
|
||||
topBalancer := NewWRRLoadBalancer(true)
|
||||
|
||||
topBalancer.Add("balancer1", balancer1, pointer(1))
|
||||
_ = balancer1.RegisterStatusUpdater(func(up bool) {
|
||||
topBalancer.SetStatus(t.Context(), "balancer1", up)
|
||||
})
|
||||
|
||||
topBalancer.Add("balancer2", balancer2, pointer(1))
|
||||
_ = balancer2.RegisterStatusUpdater(func(up bool) {
|
||||
topBalancer.SetStatus(t.Context(), "balancer2", up)
|
||||
})
|
||||
|
||||
conn := &fakeConn{writeCall: make(map[string]int)}
|
||||
for range 8 {
|
||||
topBalancer.ServeTCP(conn)
|
||||
}
|
||||
assert.Equal(t, 2, conn.writeCall["first"])
|
||||
assert.Equal(t, 2, conn.writeCall["second"])
|
||||
assert.Equal(t, 2, conn.writeCall["third"])
|
||||
assert.Equal(t, 2, conn.writeCall["fourth"])
|
||||
|
||||
// fourth gets downed, but balancer2 still up since third is still up.
|
||||
balancer2.SetStatus(t.Context(), "fourth", false)
|
||||
|
||||
conn = &fakeConn{writeCall: make(map[string]int)}
|
||||
for range 8 {
|
||||
topBalancer.ServeTCP(conn)
|
||||
}
|
||||
assert.Equal(t, 2, conn.writeCall["first"])
|
||||
assert.Equal(t, 2, conn.writeCall["second"])
|
||||
assert.Equal(t, 4, conn.writeCall["third"])
|
||||
assert.Equal(t, 0, conn.writeCall["fourth"])
|
||||
|
||||
// third gets downed, and the propagation triggers balancer2 to be marked as
|
||||
// down as well for topBalancer.
|
||||
balancer2.SetStatus(t.Context(), "third", false)
|
||||
|
||||
conn = &fakeConn{writeCall: make(map[string]int)}
|
||||
for range 8 {
|
||||
topBalancer.ServeTCP(conn)
|
||||
}
|
||||
assert.Equal(t, 4, conn.writeCall["first"])
|
||||
assert.Equal(t, 4, conn.writeCall["second"])
|
||||
assert.Equal(t, 0, conn.writeCall["third"])
|
||||
assert.Equal(t, 0, conn.writeCall["fourth"])
|
||||
}
|
||||
|
||||
func pointer[T any](v T) *T { return &v }
|
||||
|
||||
type fakeConn struct {
|
||||
writeCall map[string]int
|
||||
closeCall int
|
||||
}
|
||||
|
||||
func (f *fakeConn) Read(b []byte) (n int, err error) {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) Write(b []byte) (n int, err error) {
|
||||
f.writeCall[string(b)]++
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (f *fakeConn) Close() error {
|
||||
f.closeCall++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeConn) LocalAddr() net.Addr {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) RemoteAddr() net.Addr {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) SetDeadline(t time.Time) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) SetReadDeadline(t time.Time) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) SetWriteDeadline(t time.Time) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
func (f *fakeConn) CloseWrite() error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue