small pixel drawing of a pufferfish cascade

fix runaway cpu bug, simplify setupAgent
Jes Olson j3s@c3f.net
Fri, 17 Feb 2023 21:48:33 -0800
commit

5d051cf691b511cb36c4f748b5dacf329b97249c

parent

c816221ade03a4019d605c91950cd81241aa7c7d

5 files changed, 35 insertions(+), 29 deletions(-)

jump to
M .gitignore.gitignore

@@ -1,1 +1,2 @@

cascade +*.prof
M READMEREADME

@@ -4,3 +4,9 @@

cascade is a cli-based fully masterless distributed computing thing + + development + ----------------------- + + CASCADE_NAME=hewo CASCADE_BIND='127.0.0.1' go run . + CASCADE_NAME=hai CASCADE_BIND='127.0.0.2' CASCADE_JOIN='127.0.0.1' go run .
M agent.goagent.go

@@ -8,17 +8,10 @@

"github.com/hashicorp/serf/serf" ) -// Agent starts and manages a Serf & adds -// service discovery +// Agent starts and manages a Serf & adds service discovery (TODO) type Agent struct { -// conf *Config serfConf *serf.Config eventCh chan serf.Event - - // i doubt we care about handling events, - // but i'm leaving this here just in case - // eventHandlerList []EventHandler - // eventHandlersLock sync.Mutex // This is the underlying Serf we are wrapping serf *serf.Serf

@@ -53,7 +46,7 @@ case e := <-a.eventCh:

log.Printf("[INFO] cascade: Received event: %s", e.String()) case <-serfShutdownCh: - log.Printf("[WARN] cascade: Serf shutdown detected, quitting") + log.Printf("[INFO] cascade: serf shutdown detected, quitting eventloop") a.Shutdown() return
M config.goconfig.go

@@ -3,13 +3,20 @@

import ( "fmt" "net" + "os" ) const DefaultBindPort int = 4449 func DefaultConfig() *Config { + hostname, err := os.Hostname() + if err != nil { + panic(err) + } + return &Config{ BindAddr: "0.0.0.0", + NodeName: hostname, } }
M main.gomain.go

@@ -66,8 +66,7 @@ }

// Bail fast if not doing a graceful leave if !graceful { - log.Printf("[WARN] cascade: non-graceful leave detected") - return nil + log.Fatal("[WARN] cascade: non-graceful leave detected") } // Attempt a graceful leave

@@ -129,31 +128,31 @@ bindIP, bindPort, err := config.AddrParts(config.BindAddr)

if err != nil { log.Panic(err) } + serfConfig := serf.DefaultConfig() serfConfig.NodeName = config.NodeName serfConfig.ProtocolVersion = uint8(serf.ProtocolVersionMax) - serfConfig.CoalescePeriod = 3 * time.Second - serfConfig.QuiescentPeriod = time.Second - serfConfig.QueryResponseSizeLimit = 1024 - serfConfig.QuerySizeLimit = 1024 - serfConfig.UserEventSizeLimit = 512 - serfConfig.UserCoalescePeriod = 3 * time.Second - serfConfig.UserQuiescentPeriod = time.Second - // TODO: look at reconnect/tombstone settings w more scrutiny - serfConfig.ReconnectInterval = 0 - serfConfig.ReconnectTimeout = 0 - serfConfig.TombstoneTimeout = 0 - serfConfig.BroadcastTimeout = 0 - // TODO: what are the implications of true here o_O - serfConfig.EnableNameConflictResolution = true - // hardcode DefaultWANConfig because cascade is designed to be - // used as a single global system. + // TODO: how should cascade handle name conflicts? + // defaulting to just blowing up for now, but + // we _could_ take the tailscale route & append + // -1 or whatever to the node. that would be more user friendly. + // TODO: some of these serf settings were pulled + // from consul[1]. re-examine them eventually. + serfConfig.EnableNameConflictResolution = false + serfConfig.ReconnectTimeout = 3 * 24 * time.Hour + serfConfig.QueueDepthWarning = 1000000 + serfConfig.MinQueueDepth = 4096 + serfConfig.LeavePropagateDelay = 3 * time.Second + serfConfig.MemberlistConfig = memberlist.DefaultWANConfig() + serfConfig.MemberlistConfig.DeadNodeReclaimTime = 30 * time.Second serfConfig.MemberlistConfig.BindAddr = bindIP serfConfig.MemberlistConfig.BindPort = bindPort - serfConfig.MemberlistConfig.AdvertiseAddr = "" - serfConfig.MemberlistConfig.AdvertisePort = 0 agent := Create(serfConfig) return agent } + +// [1]: sources for consul serf tweaks +// https://github.com/hashicorp/consul/blob/v1.14.4/agent/consul/config.go +// https://github.com/hashicorp/consul/blob/v1.14.4/lib/serf/serf.go