small pixel drawing of a pufferfish cascade

Make error logging more accurate
Jes Olson j3s@c3f.net
Fri, 17 Feb 2023 22:35:15 -0800
commit

0eb868d006ff3b4ea93faa8eb192d6f39b9cc814

parent

516210bc28237b018f30aab25abc2011a3f1f0aa

3 files changed, 38 insertions(+), 9 deletions(-)

jump to
M agent.goagent.go

@@ -98,16 +98,15 @@ }

// Join asks the Serf instance to join. See the Serf.Join function. func (a *Agent) Join(addrs []string) (n int, err error) { - log.Printf("[INFO] cascade: joining: %v", addrs) + log.Printf("[INFO] cascade: issuing join request to '%v'", addrs) // we always ignore old events because cascade don't // care about the past n, err = a.serf.Join(addrs, true) - if n > 0 { - log.Printf("[INFO] cascade: joined: %d nodes", n) - } if err != nil { - log.Printf("[WARN] cascade: error joining: %v", err) + return n, fmt.Errorf("[ERR] cascade: error joining: %v\n", err) } + // TODO: when joining fails, we don't get an error here - serf & memberlist + // just print to stdout and serf.Join returns without issue. return n, err }
M main.gomain.go

@@ -24,7 +24,7 @@

const ( // gracefulTimeout controls how long we wait before forcefully terminating // note that this value interacts with serfConfig.LeavePropagateDelay - gracefulTimeout = 5 * time.Second + gracefulTimeout = 10 * time.Second ) func main() {

@@ -36,10 +36,23 @@ }

defer agent.Shutdown() // join any specified startup nodes if err := startupJoin(config, agent); err != nil { - log.Panic(err) + log.Fatal(err) + } + if os.Getenv("CASCADE_DEBUG") != "" { + go debugPrints(agent) } if err := handleSignals(config, agent); err != nil { log.Panic(err) + } +} + +func debugPrints(agent *Agent) { + for { + for _, m := range agent.serf.Members() { + log.Printf("[DEBUG] cascade: name '%s' addr '%s' status '%s' \n", + m.Name, m.Addr, m.Status) + } + time.Sleep(time.Second * 5) } }

@@ -114,13 +127,14 @@ if len(config.StartJoin) == 0 {

return nil } - log.Printf("Joining cluster...") n, err := agent.Join(config.StartJoin) if err != nil { return err } + if n > 0 { + log.Printf("[INFO] cascade: join request issued to %d nodes", n) + } - log.Printf("Join completed. Synced with %d initial agents", n) return nil }
A make-test-cluster

@@ -0,0 +1,16 @@

+#!/bin/sh +# +# make a little cascade cluster for development & debugging + +export CASCADE_DEBUG=true + +trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT +# error case: +# CASCADE_BIND='127.0.0.1' go run . & +# sleep 5 +# CASCADE_BIND='127.0.0.2' CASCADE_JOIN='127.0.0.1' go run . & +# happy case: +CASCADE_NAME=foo CASCADE_BIND='127.0.0.1' go run . & +sleep 5 +CASCADE_NAME=bar CASCADE_BIND='127.0.0.2' CASCADE_JOIN='127.0.0.1' go run . & +wait