Skip to content

Commit dbee56d

Browse files
committed
fingerprint: Add retry and failure config to env fingerprinters.
This change introduces new optional client fingerprinter configuration fields which can be used to control how the env fingerprinters perform retries and whether errors should halt the agent startup. The retry wrapper is used by the env_aws, env_azure, env_gce, and env_digitalocean fingerprinters and is the handler for retry and error logic on the main fingerprinter. The change is backwards compatible, so running this change without any new config options results in the same behaviour as previously. - retry_interval: Specifies the time to wait between fingerprint attempts. This will default to 2 seconds. - retry_attempts: Specifies the maximum number of fingerprint retries to be made. This will default to 0 and can be set to -1 if the operator wants infinite retries. - exit_on_failure: Determines how the agent handles failure in performing the fingerprint. The change helps alleviate problems in cloud providers where a machine starts before the metadata service and endpoint is available. In this situation, Nomad timesout the fingerprinter quickly and marks it as skipped, thus assuming we are not running within that envrionment. Operators can use the new configuration options to handle these race conditions, and wait for the metadata service to be available and respond.
1 parent fe53729 commit dbee56d

22 files changed

+1572
-196
lines changed

client/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,10 @@ type Config struct {
395395

396396
// LogFile is used by MonitorExport to stream a server's log file
397397
LogFile string `hcl:"log_file"`
398+
399+
// Fingerprinters is a map of fingerprinter configurations by name. This
400+
// currently only applies to env fingerprinters such as "env_aws".
401+
Fingerprinters map[string]*Fingerprint
398402
}
399403

400404
type APIListenerRegistrar interface {
@@ -931,6 +935,7 @@ func DefaultConfig() *Config {
931935
MinDynamicUser: 80_000,
932936
MaxDynamicUser: 89_999,
933937
},
938+
Fingerprinters: map[string]*Fingerprint{},
934939
}
935940

936941
return cfg

client/config/fingerprint.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package config
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"slices"
7+
"strings"
8+
"time"
9+
)
10+
11+
// validEnvFingerprinters contains the fingerprinters that are valid
12+
// environment fingerprinters and is used for input validation.
13+
var validEnvFingerprinters = []string{
14+
"env_aws",
15+
"env_azure",
16+
"env_gce",
17+
"env_digitalocean",
18+
}
19+
20+
// Fingerprint is an optional configuration block for environment fingerprinters
21+
// can control retry behavior and failure handling.
22+
type Fingerprint struct {
23+
24+
// Name is the fingerprinter identifier that this configuration block
25+
// relates to. It is gathered from the HCL block label.
26+
Name string `hcl:",key"`
27+
28+
// RetryInterval is the specifies the time to wait between fingerprint
29+
// attempts.
30+
RetryInterval time.Duration
31+
RetryIntervalHCL string `hcl:"retry_interval,optional"`
32+
33+
// RetryAttempts specifies the maximum number of fingerprint attempts to be
34+
// made before the failure is considered terminal.
35+
RetryAttempts int `hcl:"retry_attempts,optional"`
36+
37+
// ExitOnFailure indicates whether the fingerprinter should cause the agent
38+
// to exit if it fails to correctly perform its fingerprint run. This is
39+
// useful if the fingerprinter provides critical information used by Nomad
40+
// workloads.
41+
ExitOnFailure *bool `hcl:"exit_on_failure,optional"`
42+
43+
// ExtraKeysHCL is used by hcl to surface unexpected keys
44+
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
45+
}
46+
47+
// Copy is used to satisfy to helper.Copyable interface, so we can perform
48+
// copies of the fingerprint config slice.
49+
func (f *Fingerprint) Copy() *Fingerprint {
50+
if f == nil {
51+
return nil
52+
}
53+
54+
c := new(Fingerprint)
55+
*c = *f
56+
return c
57+
}
58+
59+
// Merge is used to combine two fingerprint blocks with the block passed into
60+
// the function taking precedence. The name is not overwritten as this is
61+
// expected to match as it's the block label. It is the callers responsibility
62+
// to ensure the two fingerprint blocks are for the same fingerprinter
63+
// implementation.
64+
func (f *Fingerprint) Merge(z *Fingerprint) *Fingerprint {
65+
if f == nil {
66+
return z
67+
}
68+
69+
result := *f
70+
71+
if z == nil {
72+
return &result
73+
}
74+
75+
if z.RetryInterval != 0 {
76+
result.RetryInterval = z.RetryInterval
77+
}
78+
if z.RetryIntervalHCL != "" {
79+
result.RetryIntervalHCL = z.RetryIntervalHCL
80+
}
81+
if z.RetryAttempts != 0 {
82+
result.RetryAttempts = z.RetryAttempts
83+
}
84+
if z.ExitOnFailure != nil {
85+
result.ExitOnFailure = z.ExitOnFailure
86+
}
87+
88+
return &result
89+
}
90+
91+
// Validate the fingerprint block to ensure we do not have any values that
92+
// cannot be handled.
93+
func (f *Fingerprint) Validate() error {
94+
95+
if f == nil {
96+
return nil
97+
}
98+
99+
if f.Name == "" {
100+
return errors.New("fingerprint name cannot be empty")
101+
}
102+
if !slices.Contains(validEnvFingerprinters, f.Name) {
103+
return fmt.Errorf("fingerprint %q does not support configuration", f.Name)
104+
}
105+
if f.RetryInterval < 0 {
106+
return fmt.Errorf("fingerprint %q retry interval cannot be negative", f.Name)
107+
}
108+
if f.RetryAttempts < -1 {
109+
return fmt.Errorf("fingerprint %q retry attempts cannot be less than -1", f.Name)
110+
}
111+
if len(f.ExtraKeysHCL) > 0 {
112+
return fmt.Errorf("fingerprint %q contains unknown configuration options: %s",
113+
f.Name, strings.Join(f.ExtraKeysHCL, ","))
114+
}
115+
116+
return nil
117+
}

0 commit comments

Comments
 (0)