feat: initial release
This commit is contained in:
parent
a3cf21f5bd
commit
761174d035
41 changed files with 2008 additions and 217 deletions
93
internal/roles/master/master.go
Normal file
93
internal/roles/master/master.go
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
package master
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"git.wzray.com/homelab/mastermind/internal/config"
|
||||
"git.wzray.com/homelab/mastermind/internal/roles"
|
||||
"git.wzray.com/homelab/mastermind/internal/state"
|
||||
"git.wzray.com/homelab/mastermind/internal/types"
|
||||
"git.wzray.com/homelab/mastermind/internal/web/client"
|
||||
)
|
||||
|
||||
type Role struct {
|
||||
state *state.RuntimeState
|
||||
config config.MasterConfig
|
||||
tasksGroup sync.WaitGroup
|
||||
observer *observer
|
||||
roles.BaseRole
|
||||
}
|
||||
|
||||
func New(state *state.RuntimeState, config config.MasterConfig) *Role {
|
||||
return &Role{
|
||||
state: state,
|
||||
config: config,
|
||||
observer: newObserver(
|
||||
state,
|
||||
config.ObserverInterval,
|
||||
config.BackoffSeconds,
|
||||
config.BackoffCount,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Role) OnStartup(ctx context.Context) error {
|
||||
r.tasksGroup.Go(func() {
|
||||
r.observer.Start(ctx, func(n types.Node) error {
|
||||
_, err := r.onLeave(n)
|
||||
return err
|
||||
})
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Role) OnShutdown() error {
|
||||
r.tasksGroup.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Role) notify(path types.Path, v any) {
|
||||
for _, n := range r.state.Registry.Nodes() {
|
||||
addr := n.Address
|
||||
r.tasksGroup.Go(func() {
|
||||
client.Post[any](addr, path, v)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Role) onJoin(node types.Node) ([]types.Node, error) {
|
||||
if err := r.state.Registry.AddNode(node); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r.notify(types.PathNodeJoin, node)
|
||||
|
||||
return r.state.Registry.AllNodes(), nil
|
||||
}
|
||||
|
||||
func (r *Role) onLeave(node types.Node) (bool, error) {
|
||||
if err := r.state.Registry.RemoveNode(node.Name); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
r.notify(types.PathNodeLeave, node.Name)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *Role) onKeepAlive(node types.Node) (bool, error) {
|
||||
if ok := r.state.Registry.Exists(node.Name); !ok {
|
||||
_, err := r.onJoin(node)
|
||||
return true, err
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (c *Role) RegisterHandlers(r types.Registrator) {
|
||||
r.Register(types.PostEndpoint(types.PathMasterJoin, c.onJoin))
|
||||
r.Register(types.PostEndpoint(types.PathMasterLeave, c.onLeave))
|
||||
r.Register(types.PostEndpoint(types.PathMasterKeepalive, c.onKeepAlive))
|
||||
}
|
||||
83
internal/roles/master/observer.go
Normal file
83
internal/roles/master/observer.go
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
package master
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"git.wzray.com/homelab/mastermind/internal/state"
|
||||
"git.wzray.com/homelab/mastermind/internal/types"
|
||||
"git.wzray.com/homelab/mastermind/internal/web/client"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type observer struct {
|
||||
state *state.RuntimeState
|
||||
interval int
|
||||
backoff int
|
||||
backoffCount int
|
||||
}
|
||||
|
||||
func newObserver(
|
||||
state *state.RuntimeState,
|
||||
interval int,
|
||||
backoff int,
|
||||
backoffCount int,
|
||||
) *observer {
|
||||
return &observer{
|
||||
state: state,
|
||||
interval: interval,
|
||||
backoff: backoff,
|
||||
backoffCount: backoffCount,
|
||||
}
|
||||
}
|
||||
|
||||
func (o *observer) pollNodes(ctx context.Context, onLeave func(types.Node) error) {
|
||||
for _, n := range o.state.Registry.Nodes() {
|
||||
name := n.Name
|
||||
logger := log.With().Str("name", name).Logger()
|
||||
logger.Debug().Msg("checking node")
|
||||
|
||||
delay := time.Duration(o.backoff)
|
||||
alive := false
|
||||
for i := o.backoffCount; i > 0; i-- {
|
||||
_, err := client.Get[any](n.Address, types.PathNodeHealthcheck)
|
||||
|
||||
if err == nil {
|
||||
logger.Debug().Msg("node is alive")
|
||||
alive = true
|
||||
break
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
logger.Info().Any("delay", delay).Msg("node didn't respond, sleeping")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
goto dead
|
||||
case <-time.After(delay * time.Second):
|
||||
delay *= 2
|
||||
}
|
||||
}
|
||||
|
||||
dead:
|
||||
if !alive {
|
||||
logger.Info().Msg("node is dead, removing")
|
||||
if err := onLeave(n); err != nil {
|
||||
logger.Warn().Err(err).Msg("onLeave call failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *observer) Start(ctx context.Context, onLeave func(types.Node) error) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(time.Duration(o.interval) * time.Second):
|
||||
o.pollNodes(ctx, onLeave)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue