internal/agent/services.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
package agent
import (
"encoding/json"
"fmt"
"sort"
"sync"
"j3s.sh/cascade/api"
"go.etcd.io/bbolt"
)
// serfRegisterEvent / serfDeregisterEvent are the user event names we
// gossip when an agent's owned service set changes.
const (
serfRegisterEvent = "cascade:svc-register"
serfDeregisterEvent = "cascade:svc-deregister"
)
// servicesBucket is the bbolt bucket holding this agent's owned services,
// keyed by service ID.
var servicesBucket = []byte("services")
// registerPayload is the wire format for svc-register events.
type registerPayload struct {
Node string `json:"node"`
Service *api.AgentService `json:"service"`
}
// deregisterPayload is the wire format for svc-deregister events.
type deregisterPayload struct {
Node string `json:"node"`
ID string `json:"id"`
}
// ServiceStore tracks services known to this agent — both services this
// agent owns (which are persisted) and services owned by peers (which
// are kept in memory only and refreshed via gossip).
type ServiceStore struct {
mu sync.RWMutex
nodeName string
db *bbolt.DB
// services indexed by node name then service ID.
services map[string]map[string]*api.AgentService
}
// NewServiceStore opens the on-disk store and loads any previously-owned
// services for this node into memory.
func NewServiceStore(nodeName, dbPath string) (*ServiceStore, error) {
db, err := bbolt.Open(dbPath, 0o600, nil)
if err != nil {
return nil, fmt.Errorf("open service db %q: %w", dbPath, err)
}
s := &ServiceStore{
nodeName: nodeName,
db: db,
services: map[string]map[string]*api.AgentService{},
}
if err := db.Update(func(tx *bbolt.Tx) error {
b, err := tx.CreateBucketIfNotExists(servicesBucket)
if err != nil {
return err
}
return b.ForEach(func(k, v []byte) error {
var svc api.AgentService
if err := json.Unmarshal(v, &svc); err != nil {
return fmt.Errorf("decode persisted service %q: %w", string(k), err)
}
s.putLocked(nodeName, &svc)
return nil
})
}); err != nil {
_ = db.Close()
return nil, err
}
return s, nil
}
// Close releases the on-disk handle. Safe to call multiple times.
func (s *ServiceStore) Close() error {
if s.db == nil {
return nil
}
err := s.db.Close()
s.db = nil
return err
}
// Register adds or replaces a service owned by this agent. Persists to disk
// before returning so a crash after the response can't lose the write.
func (s *ServiceStore) Register(svc *api.AgentService) error {
if svc.ID == "" {
return fmt.Errorf("service ID required")
}
if svc.Service == "" {
return fmt.Errorf("service name required")
}
buf, err := json.Marshal(svc)
if err != nil {
return err
}
if err := s.db.Update(func(tx *bbolt.Tx) error {
return tx.Bucket(servicesBucket).Put([]byte(svc.ID), buf)
}); err != nil {
return err
}
s.mu.Lock()
s.putLocked(s.nodeName, svc)
s.mu.Unlock()
return nil
}
// Deregister removes a service owned by this agent.
func (s *ServiceStore) Deregister(id string) error {
if err := s.db.Update(func(tx *bbolt.Tx) error {
return tx.Bucket(servicesBucket).Delete([]byte(id))
}); err != nil {
return err
}
s.mu.Lock()
s.deleteLocked(s.nodeName, id)
s.mu.Unlock()
return nil
}
// ApplyRegister handles a gossipped service registration from a peer.
// No-op if the event is one we originated (already in our store).
func (s *ServiceStore) ApplyRegister(node string, svc *api.AgentService) {
if node == s.nodeName {
return
}
s.mu.Lock()
s.putLocked(node, svc)
s.mu.Unlock()
}
// ApplyDeregister handles a gossipped service deregistration from a peer.
func (s *ServiceStore) ApplyDeregister(node, id string) {
if node == s.nodeName {
return
}
s.mu.Lock()
s.deleteLocked(node, id)
s.mu.Unlock()
}
// DropNode forgets every service owned by the given node. Called when a
// peer leaves or is declared failed.
func (s *ServiceStore) DropNode(node string) {
if node == s.nodeName {
return
}
s.mu.Lock()
delete(s.services, node)
s.mu.Unlock()
}
// Owned returns the services this agent owns. Copy, safe to mutate.
func (s *ServiceStore) Owned() map[string]*api.AgentService {
s.mu.RLock()
defer s.mu.RUnlock()
out := map[string]*api.AgentService{}
for id, svc := range s.services[s.nodeName] {
cp := *svc
out[id] = &cp
}
return out
}
// NodeService pairs a service with the node that owns it.
type NodeService struct {
Node string
Service *api.AgentService
}
// AllServices returns the cluster-wide map of service name to the union of
// tags seen across all instances. The empty slice means the service exists
// with no tags somewhere.
func (s *ServiceStore) AllServices() map[string][]string {
s.mu.RLock()
defer s.mu.RUnlock()
tagsByName := map[string]map[string]struct{}{}
for _, byID := range s.services {
for _, svc := range byID {
set, ok := tagsByName[svc.Service]
if !ok {
set = map[string]struct{}{}
tagsByName[svc.Service] = set
}
for _, t := range svc.Tags {
set[t] = struct{}{}
}
}
}
out := make(map[string][]string, len(tagsByName))
for name, set := range tagsByName {
tags := make([]string, 0, len(set))
for t := range set {
tags = append(tags, t)
}
sort.Strings(tags)
out[name] = tags
}
return out
}
// AllInstances returns every gossipped service instance across the entire
// cluster, one entry per (node, service) pair.
func (s *ServiceStore) AllInstances() []NodeService {
s.mu.RLock()
defer s.mu.RUnlock()
var out []NodeService
for node, byID := range s.services {
for _, svc := range byID {
cp := *svc
out = append(out, NodeService{Node: node, Service: &cp})
}
}
sort.Slice(out, func(i, j int) bool {
if out[i].Node != out[j].Node {
return out[i].Node < out[j].Node
}
return out[i].Service.ID < out[j].Service.ID
})
return out
}
// ServiceInstances returns every gossipped instance of the named service,
// across all nodes.
func (s *ServiceStore) ServiceInstances(name string) []NodeService {
s.mu.RLock()
defer s.mu.RUnlock()
var out []NodeService
for node, byID := range s.services {
for _, svc := range byID {
if svc.Service != name {
continue
}
cp := *svc
out = append(out, NodeService{Node: node, Service: &cp})
}
}
sort.Slice(out, func(i, j int) bool {
if out[i].Node != out[j].Node {
return out[i].Node < out[j].Node
}
return out[i].Service.ID < out[j].Service.ID
})
return out
}
// NodeServices returns every service owned by the named node.
func (s *ServiceStore) NodeServices(node string) map[string]*api.AgentService {
s.mu.RLock()
defer s.mu.RUnlock()
out := map[string]*api.AgentService{}
for id, svc := range s.services[node] {
cp := *svc
out[id] = &cp
}
return out
}
// Nodes returns the set of node names that currently have at least one
// gossipped service.
func (s *ServiceStore) Nodes() []string {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]string, 0, len(s.services))
for node := range s.services {
out = append(out, node)
}
sort.Strings(out)
return out
}
// putLocked must be called with s.mu held.
func (s *ServiceStore) putLocked(node string, svc *api.AgentService) {
if s.services[node] == nil {
s.services[node] = map[string]*api.AgentService{}
}
s.services[node][svc.ID] = svc
}
// deleteLocked must be called with s.mu held.
func (s *ServiceStore) deleteLocked(node, id string) {
if m, ok := s.services[node]; ok {
delete(m, id)
if len(m) == 0 {
delete(s.services, node)
}
}
}