1
0
Fork 0

fix: don't spam nodes with updates and instead pull the registry on keepalive

This commit is contained in:
Arthur K. 2026-01-22 16:02:44 +03:00
parent 7fb90dd1da
commit 476c4b056f
Signed by: wzray
GPG key ID: B97F30FDC4636357
5 changed files with 108 additions and 40 deletions

53
TODO.md
View file

@ -1,3 +1,56 @@
# Background
Some background first:
the node can have multiple roles
this includes (but not limited to)
* Host (can generate events)
* DNS (can consume the events and act on them)
* Something else that I might come up with (the architecture has to be expandable)
# Control pane (3+ nodes)
* Quorum
* Consists of $n / 2 + 1$ nodes
* Cluster is considered "degraded" if no quorum can be created
* Stores an event log
* **Only** leader can append to the log (with quorum permission)
* Membership authority
* No joins without quorum approval
* Leaves are not propagated without quorum
* Manages epoch (useful for GC)
* Node $N$ with $N.epoch != cluster.epoch$ can **not** join the cluster, and has to re-join (bootstrap)
* Can (but doesn't have to) be a bootstrap point
# Membership
* Membership is managed though SWIM
* Each node contains a small slice of the entire network
## Joining
Each node has an array of roles:
1. That it performs
2. That it requires to operate (can be moved out to the master, or the shared type)
3. That it needs for bootstrapping (analogous to 2.)
Node can join via a master or via other nodes
When a node requests to join, the responder makes a request to the CP and asks for a permission to add this node
* If master allows
1. The node gets a membership digest from the CP.
2. The node *can* be brought up to speed using it's neighbors from 1.
3. Node join event gets broadcasted over SWIM gossiping
* Otherwise, nothing happens
# Host node
## Bootstrap
Host node requests `dns` nodes on join (and other node types, such as `ns`, `nginx`, etc... They should really be called something like `dns_processor`, and the internals (how it processes the dns) should not be visible to the cluster, but that's a task for a future me)
When a new update occurs, it sends the update to *some* `dns` hosts.
# DNS node
## Bootstrap
First, it gets all the available `hosts` from the CP
Then it requests their configs and sets map[hostName]seq accordingly
## Simple join (when other nodes exist)
It requests it's config from other nodes and that's it
<!-- TODO: finish the TODO file lol -->
# Minor To-Do
- auth middleware lol - auth middleware lol
- move request logging out of the request handling into a middleware - move request logging out of the request handling into a middleware
- nginx role - nginx role

6
go.mod
View file

@ -2,10 +2,12 @@ module git.wzray.com/homelab/hivemind
go 1.25.5 go 1.25.5
require github.com/rs/zerolog v1.34.0 require (
github.com/rs/zerolog v1.34.0
github.com/BurntSushi/toml v1.6.0
)
require ( require (
github.com/BurntSushi/toml v1.6.0 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-isatty v0.0.20 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect

View file

@ -9,6 +9,7 @@ import (
"git.wzray.com/homelab/hivemind/internal/state" "git.wzray.com/homelab/hivemind/internal/state"
"git.wzray.com/homelab/hivemind/internal/types" "git.wzray.com/homelab/hivemind/internal/types"
"git.wzray.com/homelab/hivemind/internal/web/client" "git.wzray.com/homelab/hivemind/internal/web/client"
"github.com/rs/zerolog/log"
) )
type Role struct { type Role struct {
@ -36,7 +37,7 @@ func New(state *state.RuntimeState, config config.MasterConfig) *Role {
func (r *Role) OnStartup(ctx context.Context) error { func (r *Role) OnStartup(ctx context.Context) error {
r.tasksGroup.Go(func() { r.tasksGroup.Go(func() {
r.observer.Start(ctx, func(n types.Node) error { r.observer.Start(ctx, func(n types.Node) error {
_, err := r.onLeave(n) _, err := r.onLeave(n, true)
return err return err
}) })
}) })
@ -50,7 +51,7 @@ func (r *Role) OnShutdown() error {
} }
func (r *Role) notify(path types.Path, v any) { func (r *Role) notify(path types.Path, v any) {
for _, n := range r.state.Registry.Nodes() { for _, n := range r.state.Registry.ByRole(types.MasterRole) {
addr := n.Endpoint addr := n.Endpoint
r.tasksGroup.Go(func() { r.tasksGroup.Go(func() {
client.Post[any](addr, path, v) client.Post[any](addr, path, v)
@ -58,39 +59,58 @@ func (r *Role) notify(path types.Path, v any) {
} }
} }
func (r *Role) onJoin(node types.Node) (map[string]types.Node, error) { func (r *Role) onJoin(node types.Node, notify bool) (map[string]types.Node, error) {
if err := r.state.Registry.AddNode(node); err != nil { if err := r.state.Registry.AddNode(node); err != nil {
return nil, err return nil, err
} }
r.notify(types.PathNodeJoin, node) if notify {
r.notify(types.PathMasterEventJoin, node)
}
return r.state.Registry.AllNodes(), nil return r.state.Registry.AllNodes(), nil
} }
func (r *Role) onLeave(node types.Node) (bool, error) { func (r *Role) onLeave(node types.Node, notify bool) (bool, error) {
if err := r.state.Registry.RemoveNode(node); err != nil { if err := r.state.Registry.RemoveNode(node); err != nil {
return false, err return false, err
} }
r.notify(types.PathNodeLeave, node) if notify {
r.notify(types.PathMasterEventLeave, node)
}
return true, nil return true, nil
} }
func (r *Role) onKeepAlive(node types.Node) (bool, error) { func (r *Role) onKeepAlive(node types.Node, notify bool) (map[string]types.Node, error) {
r.observer.onKeepAlive(node) r.observer.onKeepAlive(node)
if ok := r.state.Registry.Exists(node.Hostname); !ok { if ok := r.state.Registry.Exists(node.Hostname); !ok {
_, err := r.onJoin(node) // TODO: i don't like this side effect
return true, err if _, err := r.onJoin(node, true); err != nil {
log.Warn().Err(err).Msg("unable to add node to the registry from keepalive")
}
} }
return false, nil if notify {
r.notify(types.PathMasterEventKeepalive, node)
}
return r.state.Registry.AllNodes(), nil
}
func eventFunc[R any](fn func(types.Node, bool) (R, error), notify bool) func(types.Node) (R, error) {
return func(n types.Node) (R, error) {
return fn(n, notify)
}
} }
func (c *Role) RegisterHandlers(r types.Registrator) { func (c *Role) RegisterHandlers(r types.Registrator) {
r.Register(types.PostEndpoint(types.PathMasterJoin, c.onJoin)) r.Register(types.PostEndpoint(types.PathMasterKeepalive, eventFunc(c.onKeepAlive, true)))
r.Register(types.PostEndpoint(types.PathMasterLeave, c.onLeave)) r.Register(types.PostEndpoint(types.PathMasterEventKeepalive, eventFunc(c.onKeepAlive, false)))
r.Register(types.PostEndpoint(types.PathMasterKeepalive, c.onKeepAlive)) r.Register(types.PostEndpoint(types.PathMasterJoin, eventFunc(c.onJoin, true)))
r.Register(types.PostEndpoint(types.PathMasterLeave, eventFunc(c.onLeave, true)))
r.Register(types.PostEndpoint(types.PathMasterEventJoin, eventFunc(c.onJoin, false)))
r.Register(types.PostEndpoint(types.PathMasterEventLeave, eventFunc(c.onLeave, false)))
} }

View file

@ -92,7 +92,6 @@ func (r *Role) Leave() error {
func (r *Role) OnStartup(ctx context.Context) error { func (r *Role) OnStartup(ctx context.Context) error {
r.keepaliveGroup.Go(r.keepaliveFunc(ctx)) r.keepaliveGroup.Go(r.keepaliveFunc(ctx))
return nil return nil
} }
@ -107,11 +106,20 @@ func (r *Role) keepaliveFunc(ctx context.Context) func() {
logger := log.With().Str("name", m.Hostname).Logger() logger := log.With().Str("name", m.Hostname).Logger()
logger.Debug().Msg("sending keepalive packet") logger.Debug().Msg("sending keepalive packet")
if _, err := client.Post[any](m.Endpoint, types.PathMasterKeepalive, r.state.Self); err != nil { nodes, err := client.Post[map[string]types.Node](m.Endpoint, types.PathMasterKeepalive, r.state.Self)
if err != nil {
logger.Info().Err(err).Msg("unable to send keepalive packet") logger.Info().Err(err).Msg("unable to send keepalive packet")
} else { continue
logger.Debug().Msg("keepalive packet sent")
} }
logger.Debug().Msg("keepalive packet sent")
if err := r.state.Registry.Set(*nodes); err != nil {
logger.Warn().Err(err).Msg("unable to set masters nodes")
continue
}
break
} }
} }
@ -127,26 +135,10 @@ func (r *Role) keepaliveFunc(ctx context.Context) func() {
} }
} }
func (r *Role) onJoin(node types.Node) (bool, error) {
if err := r.state.Registry.AddNode(node); err != nil {
return false, err
}
return true, nil
}
func (r *Role) onLeave(node types.Node) (bool, error) {
if err := r.state.Registry.RemoveNode(node); err != nil {
return false, err
}
return true, nil
}
func healthcheck() (string, error) { func healthcheck() (string, error) {
return "OK", nil return "OK", nil
} }
func (n *Role) RegisterHandlers(r types.Registrator) { func (n *Role) RegisterHandlers(r types.Registrator) {
r.Register(types.GetEndpoint(types.PathNodeHealthcheck, healthcheck)) r.Register(types.GetEndpoint(types.PathNodeHealthcheck, healthcheck))
r.Register(types.PostEndpoint(types.PathNodeJoin, n.onJoin))
r.Register(types.PostEndpoint(types.PathNodeLeave, n.onLeave))
} }

View file

@ -15,13 +15,14 @@ func (p Path) String() string {
} }
const ( const (
PathMasterJoin Path = "/master/join" PathMasterJoin Path = "/master/join"
PathMasterLeave Path = "/master/leave" PathMasterLeave Path = "/master/leave"
PathMasterKeepalive Path = "/master/keepalive" PathMasterKeepalive Path = "/master/keepalive"
PathMasterEventJoin Path = "/master/event_join"
PathMasterEventLeave Path = "/master/event_leave"
PathMasterEventKeepalive Path = "/master/event_keepalive"
PathNodeHealthcheck Path = "/node/healthcheck" PathNodeHealthcheck Path = "/node/healthcheck"
PathNodeJoin Path = "/node/join"
PathNodeLeave Path = "/node/leave"
PathDnsCallback Path = "/dns/callback" PathDnsCallback Path = "/dns/callback"