diff --git a/TODO.md b/TODO.md index 47c0fea..68b1a09 100644 --- a/TODO.md +++ b/TODO.md @@ -1,3 +1,56 @@ +# Background +Some background first: +the node can have multiple roles +this includes (but not limited to) +* Host (can generate events) +* DNS (can consume the events and act on them) +* Something else that I might come up with (the architecture has to be expandable) + +# Control pane (3+ nodes) +* Quorum + * Consists of $n / 2 + 1$ nodes + * Cluster is considered "degraded" if no quorum can be created +* Stores an event log + * **Only** leader can append to the log (with quorum permission) +* Membership authority + * No joins without quorum approval + * Leaves are not propagated without quorum +* Manages epoch (useful for GC) + * Node $N$ with $N.epoch != cluster.epoch$ can **not** join the cluster, and has to re-join (bootstrap) +* Can (but doesn't have to) be a bootstrap point + +# Membership +* Membership is managed though SWIM +* Each node contains a small slice of the entire network +## Joining +Each node has an array of roles: +1. That it performs +2. That it requires to operate (can be moved out to the master, or the shared type) +3. That it needs for bootstrapping (analogous to 2.) + +Node can join via a master or via other nodes +When a node requests to join, the responder makes a request to the CP and asks for a permission to add this node +* If master allows + 1. The node gets a membership digest from the CP. + 2. The node *can* be brought up to speed using it's neighbors from 1. + 3. Node join event gets broadcasted over SWIM gossiping +* Otherwise, nothing happens + +# Host node +## Bootstrap +Host node requests `dns` nodes on join (and other node types, such as `ns`, `nginx`, etc... They should really be called something like `dns_processor`, and the internals (how it processes the dns) should not be visible to the cluster, but that's a task for a future me) +When a new update occurs, it sends the update to *some* `dns` hosts. + +# DNS node +## Bootstrap +First, it gets all the available `hosts` from the CP +Then it requests their configs and sets map[hostName]seq accordingly +## Simple join (when other nodes exist) +It requests it's config from other nodes and that's it + + + +# Minor To-Do - auth middleware lol - move request logging out of the request handling into a middleware - nginx role diff --git a/go.mod b/go.mod index eceeb1b..53b5b0a 100644 --- a/go.mod +++ b/go.mod @@ -2,10 +2,12 @@ module git.wzray.com/homelab/hivemind go 1.25.5 -require github.com/rs/zerolog v1.34.0 +require ( + github.com/rs/zerolog v1.34.0 + github.com/BurntSushi/toml v1.6.0 +) require ( - github.com/BurntSushi/toml v1.6.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/pkg/errors v0.9.1 // indirect diff --git a/internal/roles/master/master.go b/internal/roles/master/master.go index dee25e3..9987c69 100644 --- a/internal/roles/master/master.go +++ b/internal/roles/master/master.go @@ -9,6 +9,7 @@ import ( "git.wzray.com/homelab/hivemind/internal/state" "git.wzray.com/homelab/hivemind/internal/types" "git.wzray.com/homelab/hivemind/internal/web/client" + "github.com/rs/zerolog/log" ) type Role struct { @@ -36,7 +37,7 @@ func New(state *state.RuntimeState, config config.MasterConfig) *Role { func (r *Role) OnStartup(ctx context.Context) error { r.tasksGroup.Go(func() { r.observer.Start(ctx, func(n types.Node) error { - _, err := r.onLeave(n) + _, err := r.onLeave(n, true) return err }) }) @@ -50,7 +51,7 @@ func (r *Role) OnShutdown() error { } func (r *Role) notify(path types.Path, v any) { - for _, n := range r.state.Registry.Nodes() { + for _, n := range r.state.Registry.ByRole(types.MasterRole) { addr := n.Endpoint r.tasksGroup.Go(func() { client.Post[any](addr, path, v) @@ -58,39 +59,58 @@ func (r *Role) notify(path types.Path, v any) { } } -func (r *Role) onJoin(node types.Node) (map[string]types.Node, error) { +func (r *Role) onJoin(node types.Node, notify bool) (map[string]types.Node, error) { if err := r.state.Registry.AddNode(node); err != nil { return nil, err } - r.notify(types.PathNodeJoin, node) + if notify { + r.notify(types.PathMasterEventJoin, node) + } return r.state.Registry.AllNodes(), nil } -func (r *Role) onLeave(node types.Node) (bool, error) { +func (r *Role) onLeave(node types.Node, notify bool) (bool, error) { if err := r.state.Registry.RemoveNode(node); err != nil { return false, err } - r.notify(types.PathNodeLeave, node) + if notify { + r.notify(types.PathMasterEventLeave, node) + } return true, nil } -func (r *Role) onKeepAlive(node types.Node) (bool, error) { +func (r *Role) onKeepAlive(node types.Node, notify bool) (map[string]types.Node, error) { r.observer.onKeepAlive(node) if ok := r.state.Registry.Exists(node.Hostname); !ok { - _, err := r.onJoin(node) - return true, err + // TODO: i don't like this side effect + if _, err := r.onJoin(node, true); err != nil { + log.Warn().Err(err).Msg("unable to add node to the registry from keepalive") + } } - return false, nil + if notify { + r.notify(types.PathMasterEventKeepalive, node) + } + + return r.state.Registry.AllNodes(), nil +} + +func eventFunc[R any](fn func(types.Node, bool) (R, error), notify bool) func(types.Node) (R, error) { + return func(n types.Node) (R, error) { + return fn(n, notify) + } } func (c *Role) RegisterHandlers(r types.Registrator) { - r.Register(types.PostEndpoint(types.PathMasterJoin, c.onJoin)) - r.Register(types.PostEndpoint(types.PathMasterLeave, c.onLeave)) - r.Register(types.PostEndpoint(types.PathMasterKeepalive, c.onKeepAlive)) + r.Register(types.PostEndpoint(types.PathMasterKeepalive, eventFunc(c.onKeepAlive, true))) + r.Register(types.PostEndpoint(types.PathMasterEventKeepalive, eventFunc(c.onKeepAlive, false))) + r.Register(types.PostEndpoint(types.PathMasterJoin, eventFunc(c.onJoin, true))) + r.Register(types.PostEndpoint(types.PathMasterLeave, eventFunc(c.onLeave, true))) + r.Register(types.PostEndpoint(types.PathMasterEventJoin, eventFunc(c.onJoin, false))) + r.Register(types.PostEndpoint(types.PathMasterEventLeave, eventFunc(c.onLeave, false))) } diff --git a/internal/roles/node/node.go b/internal/roles/node/node.go index a09355b..0a04fe8 100644 --- a/internal/roles/node/node.go +++ b/internal/roles/node/node.go @@ -92,7 +92,6 @@ func (r *Role) Leave() error { func (r *Role) OnStartup(ctx context.Context) error { r.keepaliveGroup.Go(r.keepaliveFunc(ctx)) - return nil } @@ -107,11 +106,20 @@ func (r *Role) keepaliveFunc(ctx context.Context) func() { logger := log.With().Str("name", m.Hostname).Logger() logger.Debug().Msg("sending keepalive packet") - if _, err := client.Post[any](m.Endpoint, types.PathMasterKeepalive, r.state.Self); err != nil { + nodes, err := client.Post[map[string]types.Node](m.Endpoint, types.PathMasterKeepalive, r.state.Self) + if err != nil { logger.Info().Err(err).Msg("unable to send keepalive packet") - } else { - logger.Debug().Msg("keepalive packet sent") + continue } + + logger.Debug().Msg("keepalive packet sent") + + if err := r.state.Registry.Set(*nodes); err != nil { + logger.Warn().Err(err).Msg("unable to set masters nodes") + continue + } + + break } } @@ -127,26 +135,10 @@ func (r *Role) keepaliveFunc(ctx context.Context) func() { } } -func (r *Role) onJoin(node types.Node) (bool, error) { - if err := r.state.Registry.AddNode(node); err != nil { - return false, err - } - return true, nil -} - -func (r *Role) onLeave(node types.Node) (bool, error) { - if err := r.state.Registry.RemoveNode(node); err != nil { - return false, err - } - return true, nil -} - func healthcheck() (string, error) { return "OK", nil } func (n *Role) RegisterHandlers(r types.Registrator) { r.Register(types.GetEndpoint(types.PathNodeHealthcheck, healthcheck)) - r.Register(types.PostEndpoint(types.PathNodeJoin, n.onJoin)) - r.Register(types.PostEndpoint(types.PathNodeLeave, n.onLeave)) } diff --git a/internal/types/web.go b/internal/types/web.go index 1416d6a..97d86f2 100644 --- a/internal/types/web.go +++ b/internal/types/web.go @@ -15,13 +15,14 @@ func (p Path) String() string { } const ( - PathMasterJoin Path = "/master/join" - PathMasterLeave Path = "/master/leave" - PathMasterKeepalive Path = "/master/keepalive" + PathMasterJoin Path = "/master/join" + PathMasterLeave Path = "/master/leave" + PathMasterKeepalive Path = "/master/keepalive" + PathMasterEventJoin Path = "/master/event_join" + PathMasterEventLeave Path = "/master/event_leave" + PathMasterEventKeepalive Path = "/master/event_keepalive" PathNodeHealthcheck Path = "/node/healthcheck" - PathNodeJoin Path = "/node/join" - PathNodeLeave Path = "/node/leave" PathDnsCallback Path = "/dns/callback"