package master import ( "context" "time" "git.wzray.com/homelab/hivemind/internal/state" "git.wzray.com/homelab/hivemind/internal/types" "git.wzray.com/homelab/hivemind/internal/web/client" "github.com/rs/zerolog/log" ) type observer struct { state *state.RuntimeState interval int backoff int backoffCount int } func newObserver( state *state.RuntimeState, interval int, backoff int, backoffCount int, ) *observer { return &observer{ state: state, interval: interval, backoff: backoff, backoffCount: backoffCount, } } func (o *observer) pollNodes(ctx context.Context, onLeave func(types.Node) error) { for _, n := range o.state.Registry.Nodes() { name := n.Hostname logger := log.With().Str("name", name).Logger() logger.Debug().Msg("checking node") delay := time.Duration(o.backoff) alive := false for i := o.backoffCount; i > 0; i-- { _, err := client.Get[any](n.Endpoint, types.PathNodeHealthcheck) if err == nil { logger.Debug().Msg("node is alive") alive = true break } if i == 0 { break } logger.Info().Any("delay", delay).Msg("node didn't respond, sleeping") select { case <-ctx.Done(): goto dead case <-time.After(delay * time.Second): delay *= 2 } } dead: if !alive { logger.Info().Msg("node is dead, removing") if err := onLeave(n); err != nil { logger.Warn().Err(err).Msg("onLeave call failed") } } } } func (o *observer) Start(ctx context.Context, onLeave func(types.Node) error) { for { select { case <-ctx.Done(): return case <-time.After(time.Duration(o.interval) * time.Second): o.pollNodes(ctx, onLeave) } } }