fix: don't spam nodes with updates and instead pull the registry on keepalive
This commit is contained in:
parent
7fb90dd1da
commit
476c4b056f
5 changed files with 108 additions and 40 deletions
53
TODO.md
53
TODO.md
|
|
@ -1,3 +1,56 @@
|
||||||
|
# Background
|
||||||
|
Some background first:
|
||||||
|
the node can have multiple roles
|
||||||
|
this includes (but not limited to)
|
||||||
|
* Host (can generate events)
|
||||||
|
* DNS (can consume the events and act on them)
|
||||||
|
* Something else that I might come up with (the architecture has to be expandable)
|
||||||
|
|
||||||
|
# Control pane (3+ nodes)
|
||||||
|
* Quorum
|
||||||
|
* Consists of $n / 2 + 1$ nodes
|
||||||
|
* Cluster is considered "degraded" if no quorum can be created
|
||||||
|
* Stores an event log
|
||||||
|
* **Only** leader can append to the log (with quorum permission)
|
||||||
|
* Membership authority
|
||||||
|
* No joins without quorum approval
|
||||||
|
* Leaves are not propagated without quorum
|
||||||
|
* Manages epoch (useful for GC)
|
||||||
|
* Node $N$ with $N.epoch != cluster.epoch$ can **not** join the cluster, and has to re-join (bootstrap)
|
||||||
|
* Can (but doesn't have to) be a bootstrap point
|
||||||
|
|
||||||
|
# Membership
|
||||||
|
* Membership is managed though SWIM
|
||||||
|
* Each node contains a small slice of the entire network
|
||||||
|
## Joining
|
||||||
|
Each node has an array of roles:
|
||||||
|
1. That it performs
|
||||||
|
2. That it requires to operate (can be moved out to the master, or the shared type)
|
||||||
|
3. That it needs for bootstrapping (analogous to 2.)
|
||||||
|
|
||||||
|
Node can join via a master or via other nodes
|
||||||
|
When a node requests to join, the responder makes a request to the CP and asks for a permission to add this node
|
||||||
|
* If master allows
|
||||||
|
1. The node gets a membership digest from the CP.
|
||||||
|
2. The node *can* be brought up to speed using it's neighbors from 1.
|
||||||
|
3. Node join event gets broadcasted over SWIM gossiping
|
||||||
|
* Otherwise, nothing happens
|
||||||
|
|
||||||
|
# Host node
|
||||||
|
## Bootstrap
|
||||||
|
Host node requests `dns` nodes on join (and other node types, such as `ns`, `nginx`, etc... They should really be called something like `dns_processor`, and the internals (how it processes the dns) should not be visible to the cluster, but that's a task for a future me)
|
||||||
|
When a new update occurs, it sends the update to *some* `dns` hosts.
|
||||||
|
|
||||||
|
# DNS node
|
||||||
|
## Bootstrap
|
||||||
|
First, it gets all the available `hosts` from the CP
|
||||||
|
Then it requests their configs and sets map[hostName]seq accordingly
|
||||||
|
## Simple join (when other nodes exist)
|
||||||
|
It requests it's config from other nodes and that's it
|
||||||
|
|
||||||
|
<!-- TODO: finish the TODO file lol -->
|
||||||
|
|
||||||
|
# Minor To-Do
|
||||||
- auth middleware lol
|
- auth middleware lol
|
||||||
- move request logging out of the request handling into a middleware
|
- move request logging out of the request handling into a middleware
|
||||||
- nginx role
|
- nginx role
|
||||||
|
|
|
||||||
6
go.mod
6
go.mod
|
|
@ -2,10 +2,12 @@ module git.wzray.com/homelab/hivemind
|
||||||
|
|
||||||
go 1.25.5
|
go 1.25.5
|
||||||
|
|
||||||
require github.com/rs/zerolog v1.34.0
|
require (
|
||||||
|
github.com/rs/zerolog v1.34.0
|
||||||
|
github.com/BurntSushi/toml v1.6.0
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/BurntSushi/toml v1.6.0 // indirect
|
|
||||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
github.com/pkg/errors v0.9.1 // indirect
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"git.wzray.com/homelab/hivemind/internal/state"
|
"git.wzray.com/homelab/hivemind/internal/state"
|
||||||
"git.wzray.com/homelab/hivemind/internal/types"
|
"git.wzray.com/homelab/hivemind/internal/types"
|
||||||
"git.wzray.com/homelab/hivemind/internal/web/client"
|
"git.wzray.com/homelab/hivemind/internal/web/client"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Role struct {
|
type Role struct {
|
||||||
|
|
@ -36,7 +37,7 @@ func New(state *state.RuntimeState, config config.MasterConfig) *Role {
|
||||||
func (r *Role) OnStartup(ctx context.Context) error {
|
func (r *Role) OnStartup(ctx context.Context) error {
|
||||||
r.tasksGroup.Go(func() {
|
r.tasksGroup.Go(func() {
|
||||||
r.observer.Start(ctx, func(n types.Node) error {
|
r.observer.Start(ctx, func(n types.Node) error {
|
||||||
_, err := r.onLeave(n)
|
_, err := r.onLeave(n, true)
|
||||||
return err
|
return err
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
@ -50,7 +51,7 @@ func (r *Role) OnShutdown() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Role) notify(path types.Path, v any) {
|
func (r *Role) notify(path types.Path, v any) {
|
||||||
for _, n := range r.state.Registry.Nodes() {
|
for _, n := range r.state.Registry.ByRole(types.MasterRole) {
|
||||||
addr := n.Endpoint
|
addr := n.Endpoint
|
||||||
r.tasksGroup.Go(func() {
|
r.tasksGroup.Go(func() {
|
||||||
client.Post[any](addr, path, v)
|
client.Post[any](addr, path, v)
|
||||||
|
|
@ -58,39 +59,58 @@ func (r *Role) notify(path types.Path, v any) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Role) onJoin(node types.Node) (map[string]types.Node, error) {
|
func (r *Role) onJoin(node types.Node, notify bool) (map[string]types.Node, error) {
|
||||||
if err := r.state.Registry.AddNode(node); err != nil {
|
if err := r.state.Registry.AddNode(node); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
r.notify(types.PathNodeJoin, node)
|
if notify {
|
||||||
|
r.notify(types.PathMasterEventJoin, node)
|
||||||
|
}
|
||||||
|
|
||||||
return r.state.Registry.AllNodes(), nil
|
return r.state.Registry.AllNodes(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Role) onLeave(node types.Node) (bool, error) {
|
func (r *Role) onLeave(node types.Node, notify bool) (bool, error) {
|
||||||
if err := r.state.Registry.RemoveNode(node); err != nil {
|
if err := r.state.Registry.RemoveNode(node); err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
r.notify(types.PathNodeLeave, node)
|
if notify {
|
||||||
|
r.notify(types.PathMasterEventLeave, node)
|
||||||
|
}
|
||||||
|
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Role) onKeepAlive(node types.Node) (bool, error) {
|
func (r *Role) onKeepAlive(node types.Node, notify bool) (map[string]types.Node, error) {
|
||||||
r.observer.onKeepAlive(node)
|
r.observer.onKeepAlive(node)
|
||||||
|
|
||||||
if ok := r.state.Registry.Exists(node.Hostname); !ok {
|
if ok := r.state.Registry.Exists(node.Hostname); !ok {
|
||||||
_, err := r.onJoin(node)
|
// TODO: i don't like this side effect
|
||||||
return true, err
|
if _, err := r.onJoin(node, true); err != nil {
|
||||||
|
log.Warn().Err(err).Msg("unable to add node to the registry from keepalive")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, nil
|
if notify {
|
||||||
|
r.notify(types.PathMasterEventKeepalive, node)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r.state.Registry.AllNodes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func eventFunc[R any](fn func(types.Node, bool) (R, error), notify bool) func(types.Node) (R, error) {
|
||||||
|
return func(n types.Node) (R, error) {
|
||||||
|
return fn(n, notify)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Role) RegisterHandlers(r types.Registrator) {
|
func (c *Role) RegisterHandlers(r types.Registrator) {
|
||||||
r.Register(types.PostEndpoint(types.PathMasterJoin, c.onJoin))
|
r.Register(types.PostEndpoint(types.PathMasterKeepalive, eventFunc(c.onKeepAlive, true)))
|
||||||
r.Register(types.PostEndpoint(types.PathMasterLeave, c.onLeave))
|
r.Register(types.PostEndpoint(types.PathMasterEventKeepalive, eventFunc(c.onKeepAlive, false)))
|
||||||
r.Register(types.PostEndpoint(types.PathMasterKeepalive, c.onKeepAlive))
|
r.Register(types.PostEndpoint(types.PathMasterJoin, eventFunc(c.onJoin, true)))
|
||||||
|
r.Register(types.PostEndpoint(types.PathMasterLeave, eventFunc(c.onLeave, true)))
|
||||||
|
r.Register(types.PostEndpoint(types.PathMasterEventJoin, eventFunc(c.onJoin, false)))
|
||||||
|
r.Register(types.PostEndpoint(types.PathMasterEventLeave, eventFunc(c.onLeave, false)))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,6 @@ func (r *Role) Leave() error {
|
||||||
|
|
||||||
func (r *Role) OnStartup(ctx context.Context) error {
|
func (r *Role) OnStartup(ctx context.Context) error {
|
||||||
r.keepaliveGroup.Go(r.keepaliveFunc(ctx))
|
r.keepaliveGroup.Go(r.keepaliveFunc(ctx))
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,11 +106,20 @@ func (r *Role) keepaliveFunc(ctx context.Context) func() {
|
||||||
logger := log.With().Str("name", m.Hostname).Logger()
|
logger := log.With().Str("name", m.Hostname).Logger()
|
||||||
logger.Debug().Msg("sending keepalive packet")
|
logger.Debug().Msg("sending keepalive packet")
|
||||||
|
|
||||||
if _, err := client.Post[any](m.Endpoint, types.PathMasterKeepalive, r.state.Self); err != nil {
|
nodes, err := client.Post[map[string]types.Node](m.Endpoint, types.PathMasterKeepalive, r.state.Self)
|
||||||
|
if err != nil {
|
||||||
logger.Info().Err(err).Msg("unable to send keepalive packet")
|
logger.Info().Err(err).Msg("unable to send keepalive packet")
|
||||||
} else {
|
continue
|
||||||
logger.Debug().Msg("keepalive packet sent")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.Debug().Msg("keepalive packet sent")
|
||||||
|
|
||||||
|
if err := r.state.Registry.Set(*nodes); err != nil {
|
||||||
|
logger.Warn().Err(err).Msg("unable to set masters nodes")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -127,26 +135,10 @@ func (r *Role) keepaliveFunc(ctx context.Context) func() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Role) onJoin(node types.Node) (bool, error) {
|
|
||||||
if err := r.state.Registry.AddNode(node); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Role) onLeave(node types.Node) (bool, error) {
|
|
||||||
if err := r.state.Registry.RemoveNode(node); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func healthcheck() (string, error) {
|
func healthcheck() (string, error) {
|
||||||
return "OK", nil
|
return "OK", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *Role) RegisterHandlers(r types.Registrator) {
|
func (n *Role) RegisterHandlers(r types.Registrator) {
|
||||||
r.Register(types.GetEndpoint(types.PathNodeHealthcheck, healthcheck))
|
r.Register(types.GetEndpoint(types.PathNodeHealthcheck, healthcheck))
|
||||||
r.Register(types.PostEndpoint(types.PathNodeJoin, n.onJoin))
|
|
||||||
r.Register(types.PostEndpoint(types.PathNodeLeave, n.onLeave))
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,10 +18,11 @@ const (
|
||||||
PathMasterJoin Path = "/master/join"
|
PathMasterJoin Path = "/master/join"
|
||||||
PathMasterLeave Path = "/master/leave"
|
PathMasterLeave Path = "/master/leave"
|
||||||
PathMasterKeepalive Path = "/master/keepalive"
|
PathMasterKeepalive Path = "/master/keepalive"
|
||||||
|
PathMasterEventJoin Path = "/master/event_join"
|
||||||
|
PathMasterEventLeave Path = "/master/event_leave"
|
||||||
|
PathMasterEventKeepalive Path = "/master/event_keepalive"
|
||||||
|
|
||||||
PathNodeHealthcheck Path = "/node/healthcheck"
|
PathNodeHealthcheck Path = "/node/healthcheck"
|
||||||
PathNodeJoin Path = "/node/join"
|
|
||||||
PathNodeLeave Path = "/node/leave"
|
|
||||||
|
|
||||||
PathDnsCallback Path = "/dns/callback"
|
PathDnsCallback Path = "/dns/callback"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue