Skip to content

Commit f993c71

Browse files
committed
fingerprint: Add retry and failure config to env fingerprinters.
This change introduces new optional client fingerprinter configuration fields which can be used to control how the env fingerprinters perform retries and whether errors should halt the agent startup. The retry wrapper is used by the env_aws, env_azure, env_gce, and env_digitalocean fingerprinters and is the handler for retry and error logic on the main fingerprinter. The change is backwards compatible, so running this change without any new config options results in the same behaviour as previously. - retry_interval: Specifies the time to wait between fingerprint attempts. This will default to 2 seconds. - retry_attempts: Specifies the maximum number of fingerprint retries to be made. This will default to 0 and can be set to -1 if the operator wants infinite retries. - exit_on_failure: Determines how the agent handles failure in performing the fingerprint. The change helps alleviate problems in cloud providers where a machine starts before the metadata service and endpoint is available. In this situation, Nomad times out the fingerprinter quickly and marks it as skipped, thus assuming we are not running within that environment. Operators can use the new configuration options to handle these race conditions, and wait for the metadata service to be available and respond.
1 parent fe53729 commit f993c71

22 files changed

+1587
-196
lines changed

client/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,10 @@ type Config struct {
395395

396396
// LogFile is used by MonitorExport to stream a server's log file
397397
LogFile string `hcl:"log_file"`
398+
399+
// Fingerprinters is a map of fingerprinter configurations by name. This
400+
// currently only applies to env fingerprinters such as "env_aws".
401+
Fingerprinters map[string]*Fingerprint
398402
}
399403

400404
type APIListenerRegistrar interface {
@@ -931,6 +935,7 @@ func DefaultConfig() *Config {
931935
MinDynamicUser: 80_000,
932936
MaxDynamicUser: 89_999,
933937
},
938+
Fingerprinters: map[string]*Fingerprint{},
934939
}
935940

936941
return cfg

client/config/fingerprint.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Copyright (c) HashiCorp, Inc.
2+
// SPDX-License-Identifier: BUSL-1.1
3+
4+
package config
5+
6+
import (
7+
"errors"
8+
"fmt"
9+
"slices"
10+
"strings"
11+
"time"
12+
)
13+
14+
// validEnvFingerprinters contains the fingerprinters that are valid
15+
// environment fingerprinters and is used for input validation.
16+
var validEnvFingerprinters = []string{
17+
"env_aws",
18+
"env_azure",
19+
"env_gce",
20+
"env_digitalocean",
21+
}
22+
23+
// Fingerprint is an optional configuration block for environment fingerprinters
24+
// can control retry behavior and failure handling.
25+
type Fingerprint struct {
26+
27+
// Name is the fingerprinter identifier that this configuration block
28+
// relates to. It is gathered from the HCL block label.
29+
Name string `hcl:",key"`
30+
31+
// RetryInterval is the specifies the time to wait between fingerprint
32+
// attempts.
33+
RetryInterval time.Duration
34+
RetryIntervalHCL string `hcl:"retry_interval,optional"`
35+
36+
// RetryAttempts specifies the maximum number of fingerprint attempts to be
37+
// made before the failure is considered terminal.
38+
RetryAttempts int `hcl:"retry_attempts,optional"`
39+
40+
// ExitOnFailure indicates whether the fingerprinter should cause the agent
41+
// to exit if it fails to correctly perform its fingerprint run. This is
42+
// useful if the fingerprinter provides critical information used by Nomad
43+
// workloads.
44+
ExitOnFailure *bool `hcl:"exit_on_failure,optional"`
45+
46+
// ExtraKeysHCL is used by hcl to surface unexpected keys
47+
ExtraKeysHCL []string `hcl:",unusedKeys" json:"-"`
48+
}
49+
50+
// Copy is used to satisfy to helper.Copyable interface, so we can perform
51+
// copies of the fingerprint config slice.
52+
func (f *Fingerprint) Copy() *Fingerprint {
53+
if f == nil {
54+
return nil
55+
}
56+
57+
c := new(Fingerprint)
58+
*c = *f
59+
return c
60+
}
61+
62+
// Merge is used to combine two fingerprint blocks with the block passed into
63+
// the function taking precedence. The name is not overwritten as this is
64+
// expected to match as it's the block label. It is the callers responsibility
65+
// to ensure the two fingerprint blocks are for the same fingerprinter
66+
// implementation.
67+
func (f *Fingerprint) Merge(z *Fingerprint) *Fingerprint {
68+
if f == nil {
69+
return z
70+
}
71+
72+
result := *f
73+
74+
if z == nil {
75+
return &result
76+
}
77+
78+
if z.RetryInterval != 0 {
79+
result.RetryInterval = z.RetryInterval
80+
}
81+
if z.RetryIntervalHCL != "" {
82+
result.RetryIntervalHCL = z.RetryIntervalHCL
83+
}
84+
if z.RetryAttempts != 0 {
85+
result.RetryAttempts = z.RetryAttempts
86+
}
87+
if z.ExitOnFailure != nil {
88+
result.ExitOnFailure = z.ExitOnFailure
89+
}
90+
91+
return &result
92+
}
93+
94+
// Validate the fingerprint block to ensure we do not have any values that
95+
// cannot be handled.
96+
func (f *Fingerprint) Validate() error {
97+
98+
if f == nil {
99+
return nil
100+
}
101+
102+
if f.Name == "" {
103+
return errors.New("fingerprint name cannot be empty")
104+
}
105+
if !slices.Contains(validEnvFingerprinters, f.Name) {
106+
return fmt.Errorf("fingerprint %q does not support configuration", f.Name)
107+
}
108+
if f.RetryInterval < 0 {
109+
return fmt.Errorf("fingerprint %q retry interval cannot be negative", f.Name)
110+
}
111+
if f.RetryAttempts < -1 {
112+
return fmt.Errorf("fingerprint %q retry attempts cannot be less than -1", f.Name)
113+
}
114+
if len(f.ExtraKeysHCL) > 0 {
115+
return fmt.Errorf("fingerprint %q contains unknown configuration options: %s",
116+
f.Name, strings.Join(f.ExtraKeysHCL, ","))
117+
}
118+
119+
return nil
120+
}

0 commit comments

Comments
 (0)