Skip to content

Commit 32d409f

Browse files
committed
de-duplicate all key/value pairs
Previously, de-duplication was limited such that keys passed to a log call as parameters overwrote keys passed earlier to WithValues. Using the same key in different WithValues calls or multiple times in a WithValues call or the parameters led to duplicates in the output. Now all key/value pairs get checked for duplicates. The last one wins and is printed at the place where it was added. A potential improvement would be to keep it at the place where it first appeared if the value is the same because then different log entries are more likely to have it at the beginning of their key/value pairs because it was added by a common WithValues call. Because the new implementation avoids memory allocations, the performance impact is minimal. Some cases even become faster. goos: linux goarch: amd64 pkg: k8s.io/klog/examples/benchmarks cpu: Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz │ before │ after1 │ │ sec/op │ sec/op vs base │ Output/klog/fail-verbosity-non-standard-struct-key-check/objects/0-36 385.5n ± 9% 358.9n ± 5% -6.91% (p=0.041 n=6) Output/klog/fail-verbosity-non-standard-struct-key-check/objects/10-36 374.3n ± 4% 357.3n ± 5% -4.55% (p=0.015 n=6) Output/klog/fail-verbosity-non-standard-struct-key-check/objects/20-36 388.4n ± 3% 359.2n ± 4% -7.53% (p=0.002 n=6) ... Output/zapr/fail-verbosity-non-standard-int-key-check/objects/90-36 380.9n ± 4% 379.4n ± 2% ~ (p=0.937 n=6) Output/zapr/fail-verbosity-non-standard-int-key-check/objects/100-36 375.1n ± 3% 381.9n ± 4% ~ (p=0.093 n=6) geomean 3.461µ 3.442µ -0.55% pkg: k8s.io/klog/examples/output_test ZaprOutput/regular_error_types_as_value-36 3.270µ ± 10% 3.530µ ± 9% ~ (p=0.132 n=6) ZaprOutput/struct_values-36 3.932µ ± 11% 4.107µ ± 10% ~ (p=1.000 n=6) ZaprOutput/klog.Format-36 4.276µ ± 12% 4.439µ ± 7% ~ (p=0.132 n=6) ZaprOutput/regular_error_types_when_using_logr.Error-36 2.790µ ± 7% 2.834µ ± 15% ~ (p=0.699 n=6) ZaprOutput/map_values-36 5.802µ ± 9% 5.829µ ± 3% ~ (p=1.000 n=6) ZaprOutput/log_with_name_and_values-36 4.051µ ± 4% 3.864µ ± 6% -4.63% (p=0.015 n=6) ... geomean 5.581µ 5.646µ +1.16% pkg: k8s.io/klog/v2 Header-36 1.339µ ± 5% 1.311µ ± 6% ~ (p=0.513 n=6) ... KlogOutput/klog.Format-36 4.207µ ± 10% 4.667µ ± 3% +10.95% (p=0.009 n=6) KlogOutput/cyclic_list-36 151.3µ ± 1% 151.1µ ± 1% ~ (p=0.818 n=6) KlogOutput/override_single_value-36 2.308µ ± 9% 3.501µ ± 15% +51.66% (p=0.002 n=6) KlogOutput/multiple_WithValues-36 10.91µ ± 9% 12.46µ ± 7% +14.16% (p=0.004 n=6) ... KlogOutput/duplicates-36 21.34µ ± 2% 22.52µ ± 7% ~ (p=0.065 n=6) ... geomean 252.6n 260.6n +3.18% pkg: k8s.io/klog/v2/textlogger TextloggerOutput/html_characters-36 1.982µ ± 12% 1.978µ ± 6% ~ (p=0.937 n=6) TextloggerOutput/map_values-36 3.215µ ± 6% 3.429µ ± 4% +6.67% (p=0.041 n=6) ... TextloggerOutput/mixed_duplicates-36 19.33µ ± 5% 19.59µ ± 7% ~ (p=0.818 n=6) ... TextloggerOutput/print_duplicate_keys_in_arguments-36 2.067µ ± 7% 2.388µ ± 6% +15.53% (p=0.002 n=6) ... TextloggerOutput/duplicates-36 16.80µ ± 11% 17.06µ ± 4% ~ (p=0.310 n=6) ... geomean 2.798µ 2.860µ +2.24%
1 parent 2bcc94a commit 32d409f

File tree

9 files changed

+175
-152
lines changed

9 files changed

+175
-152
lines changed

internal/serialize/keyvalues.go

Lines changed: 115 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -51,51 +51,6 @@ func WithValues(oldKV, newKV []interface{}) []interface{} {
5151
return kv
5252
}
5353

54-
// MergeKVs deduplicates elements provided in two key/value slices.
55-
//
56-
// Keys in each slice are expected to be unique, so duplicates can only occur
57-
// when the first and second slice contain the same key. When that happens, the
58-
// key/value pair from the second slice is used. The first slice must be well-formed
59-
// (= even key/value pairs). The second one may have a missing value, in which
60-
// case the special "missing value" is added to the result.
61-
func MergeKVs(first, second []interface{}) []interface{} {
62-
maxLength := len(first) + (len(second)+1)/2*2
63-
if maxLength == 0 {
64-
// Nothing to do at all.
65-
return nil
66-
}
67-
68-
if len(first) == 0 && len(second)%2 == 0 {
69-
// Nothing to be overridden, second slice is well-formed
70-
// and can be used directly.
71-
return second
72-
}
73-
74-
// Determine which keys are in the second slice so that we can skip
75-
// them when iterating over the first one. The code intentionally
76-
// favors performance over completeness: we assume that keys are string
77-
// constants and thus compare equal when the string values are equal. A
78-
// string constant being overridden by, for example, a fmt.Stringer is
79-
// not handled.
80-
overrides := map[interface{}]bool{}
81-
for i := 0; i < len(second); i += 2 {
82-
overrides[second[i]] = true
83-
}
84-
merged := make([]interface{}, 0, maxLength)
85-
for i := 0; i+1 < len(first); i += 2 {
86-
key := first[i]
87-
if overrides[key] {
88-
continue
89-
}
90-
merged = append(merged, key, first[i+1])
91-
}
92-
merged = append(merged, second...)
93-
if len(merged)%2 != 0 {
94-
merged = append(merged, missingValue)
95-
}
96-
return merged
97-
}
98-
9954
type Formatter struct {
10055
AnyToStringHook AnyToStringFunc
10156
}
@@ -105,46 +60,7 @@ type AnyToStringFunc func(v interface{}) string
10560
// MergeKVsInto is a variant of MergeKVs which directly formats the key/value
10661
// pairs into a buffer.
10762
func (f Formatter) MergeAndFormatKVs(b *bytes.Buffer, first, second []interface{}) {
108-
if len(first) == 0 && len(second) == 0 {
109-
// Nothing to do at all.
110-
return
111-
}
112-
113-
if len(first) == 0 && len(second)%2 == 0 {
114-
// Nothing to be overridden, second slice is well-formed
115-
// and can be used directly.
116-
for i := 0; i < len(second); i += 2 {
117-
f.KVFormat(b, second[i], second[i+1])
118-
}
119-
return
120-
}
121-
122-
// Determine which keys are in the second slice so that we can skip
123-
// them when iterating over the first one. The code intentionally
124-
// favors performance over completeness: we assume that keys are string
125-
// constants and thus compare equal when the string values are equal. A
126-
// string constant being overridden by, for example, a fmt.Stringer is
127-
// not handled.
128-
overrides := map[interface{}]bool{}
129-
for i := 0; i < len(second); i += 2 {
130-
overrides[second[i]] = true
131-
}
132-
for i := 0; i < len(first); i += 2 {
133-
key := first[i]
134-
if overrides[key] {
135-
continue
136-
}
137-
f.KVFormat(b, key, first[i+1])
138-
}
139-
// Round down.
140-
l := len(second)
141-
l = l / 2 * 2
142-
for i := 1; i < l; i += 2 {
143-
f.KVFormat(b, second[i-1], second[i])
144-
}
145-
if len(second)%2 == 1 {
146-
f.KVFormat(b, second[len(second)-1], missingValue)
147-
}
63+
f.formatKVs(b, first, second)
14864
}
14965

15066
func MergeAndFormatKVs(b *bytes.Buffer, first, second []interface{}) {
@@ -156,16 +72,7 @@ const missingValue = "(MISSING)"
15672
// KVListFormat serializes all key/value pairs into the provided buffer.
15773
// A space gets inserted before the first pair and between each pair.
15874
func (f Formatter) KVListFormat(b *bytes.Buffer, keysAndValues ...interface{}) {
159-
for i := 0; i < len(keysAndValues); i += 2 {
160-
var v interface{}
161-
k := keysAndValues[i]
162-
if i+1 < len(keysAndValues) {
163-
v = keysAndValues[i+1]
164-
} else {
165-
v = missingValue
166-
}
167-
f.KVFormat(b, k, v)
168-
}
75+
f.formatKVs(b, keysAndValues)
16976
}
17077

17178
func KVListFormat(b *bytes.Buffer, keysAndValues ...interface{}) {
@@ -176,6 +83,119 @@ func KVFormat(b *bytes.Buffer, k, v interface{}) {
17683
Formatter{}.KVFormat(b, k, v)
17784
}
17885

86+
// formatKVs formats all key/value pairs such that the output contains no
87+
// duplicates ("last one wins").
88+
func (f Formatter) formatKVs(b *bytes.Buffer, kvs ...[]interface{}) {
89+
// De-duplication is done by optimistically formatting all key value
90+
// pairs and then cutting out the output of those key/value pairs which
91+
// got overwritten later.
92+
//
93+
// In the common case of no duplicates, the only overhead is tracking
94+
// previous keys. This uses a slice with a simple linear search because
95+
// the number of entries is typically so low that allocating a map or
96+
// keeping a sorted slice with binary search aren't justified.
97+
//
98+
// Using a fixed size here makes the Go compiler use the stack as
99+
// initial backing store for the slice, which is crucial for
100+
// performance.
101+
existing := make([]obsoleteKV, 0, 32)
102+
obsolete := make([]interval, 0, 32) // Sorted by start index.
103+
for _, keysAndValues := range kvs {
104+
for i := 0; i < len(keysAndValues); i += 2 {
105+
var v interface{}
106+
k := keysAndValues[i]
107+
if i+1 < len(keysAndValues) {
108+
v = keysAndValues[i+1]
109+
} else {
110+
v = missingValue
111+
}
112+
var e obsoleteKV
113+
e.start = b.Len()
114+
e.key = f.KVFormat(b, k, v)
115+
e.end = b.Len()
116+
i := findObsoleteEntry(existing, e.key)
117+
if i >= 0 {
118+
// The old entry gets obsoleted. This ensures
119+
// that if the more recent one has a different value,
120+
// that value remains. If the value is the same,
121+
// then we could also keep the old entry. That
122+
// would lead to a more natural order of key/value
123+
// pairs in the output (shared key/value in multiple
124+
// log entries always at the beginning) but at the
125+
// cost of another memory comparison.
126+
obsolete = append(obsolete, existing[i].interval)
127+
existing[i].interval = e.interval
128+
} else {
129+
// Instead of appending at the end and doing a
130+
// linear search in findEntry, we could keep
131+
// the slice sorted by key and do a binary search.
132+
//
133+
// Above:
134+
// i, ok := slices.BinarySearchFunc(existing, e, func(a, b entry) int { return strings.Compare(a.key, b.key) })
135+
// Here:
136+
// existing = slices.Insert(existing, i, e)
137+
//
138+
// But that adds a dependency on the slices package
139+
// and made performance slightly worse, presumably
140+
// because the cost of shifting entries around
141+
// did not pay of with faster lookups.
142+
existing = append(existing, e)
143+
}
144+
}
145+
}
146+
147+
// If we need to remove some obsolete key/value pairs then move the memory.
148+
if len(obsolete) > 0 {
149+
// Potentially the next remaining output (might itself be obsolete).
150+
from := obsolete[0].end
151+
// Next obsolete entry.
152+
nextObsolete := 1
153+
// This is the source buffer, before truncation.
154+
all := b.Bytes()
155+
b.Truncate(obsolete[0].start)
156+
157+
for nextObsolete < len(obsolete) {
158+
if from == obsolete[nextObsolete].start {
159+
// Skip also the next obsolete key/value.
160+
from = obsolete[nextObsolete].end
161+
nextObsolete++
162+
continue
163+
}
164+
165+
// Preserve some output. Write uses copy, which
166+
// explicitly allows source and destination to overlap.
167+
// That could happen here.
168+
valid := all[from:obsolete[nextObsolete].start]
169+
b.Write(valid)
170+
from = obsolete[nextObsolete].end
171+
nextObsolete++
172+
}
173+
// Copy end of buffer.
174+
valid := all[from:]
175+
b.Write(valid)
176+
}
177+
}
178+
179+
type obsoleteKV struct {
180+
key string
181+
interval
182+
}
183+
184+
// interval includes the start and excludes the end.
185+
type interval struct {
186+
start int
187+
end int
188+
}
189+
190+
func findObsoleteEntry(entries []obsoleteKV, key string) int {
191+
for i, entry := range entries {
192+
if entry.key == key {
193+
return i
194+
}
195+
}
196+
return -1
197+
}
198+
179199
// formatAny is the fallback formatter for a value. It supports a hook (for
180200
// example, for YAML encoding) and itself uses JSON encoding.
181201
func (f Formatter) formatAny(b *bytes.Buffer, v interface{}) {

internal/serialize/keyvalues_no_slog.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import (
2828

2929
// KVFormat serializes one key/value pair into the provided buffer.
3030
// A space gets inserted before the pair.
31-
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
31+
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) string {
3232
// This is the version without slog support. Must be kept in sync with
3333
// the version in keyvalues_slog.go.
3434

@@ -37,13 +37,15 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
3737
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments
3838
// for the sake of performance. Keys with spaces,
3939
// special characters, etc. will break parsing.
40+
var key string
4041
if sK, ok := k.(string); ok {
4142
// Avoid one allocation when the key is a string, which
4243
// normally it should be.
43-
b.WriteString(sK)
44+
key = sK
4445
} else {
45-
b.WriteString(fmt.Sprintf("%s", k))
46+
key = fmt.Sprintf("%s", k)
4647
}
48+
b.WriteString(key)
4749

4850
// The type checks are sorted so that more frequently used ones
4951
// come first because that is then faster in the common
@@ -94,4 +96,6 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
9496
default:
9597
f.formatAny(b, v)
9698
}
99+
100+
return key
97101
}

internal/serialize/keyvalues_slog.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ import (
2929
)
3030

3131
// KVFormat serializes one key/value pair into the provided buffer.
32-
// A space gets inserted before the pair.
33-
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
32+
// A space gets inserted before the pair. It returns the key.
33+
func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) string {
3434
// This is the version without slog support. Must be kept in sync with
3535
// the version in keyvalues_slog.go.
3636

@@ -39,13 +39,15 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
3939
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments
4040
// for the sake of performance. Keys with spaces,
4141
// special characters, etc. will break parsing.
42+
var key string
4243
if sK, ok := k.(string); ok {
4344
// Avoid one allocation when the key is a string, which
4445
// normally it should be.
45-
b.WriteString(sK)
46+
key = sK
4647
} else {
47-
b.WriteString(fmt.Sprintf("%s", k))
48+
key = fmt.Sprintf("%s", k)
4849
}
50+
b.WriteString(key)
4951

5052
// The type checks are sorted so that more frequently used ones
5153
// come first because that is then faster in the common
@@ -112,6 +114,8 @@ func (f Formatter) KVFormat(b *bytes.Buffer, k, v interface{}) {
112114
default:
113115
f.formatAny(b, v)
114116
}
117+
118+
return key
115119
}
116120

117121
// generateJSON has the same preference for plain strings as KVFormat.

klogr.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func (l *klogger) Init(info logr.RuntimeInfo) {
5353
}
5454

5555
func (l *klogger) Info(level int, msg string, kvList ...interface{}) {
56-
merged := serialize.MergeKVs(l.values, kvList)
56+
merged := serialize.WithValues(l.values, kvList)
5757
// Skip this function.
5858
VDepth(l.callDepth+1, Level(level)).InfoSDepth(l.callDepth+1, msg, merged...)
5959
}
@@ -63,7 +63,7 @@ func (l *klogger) Enabled(level int) bool {
6363
}
6464

6565
func (l *klogger) Error(err error, msg string, kvList ...interface{}) {
66-
merged := serialize.MergeKVs(l.values, kvList)
66+
merged := serialize.WithValues(l.values, kvList)
6767
ErrorSDepth(l.callDepth+1, err, msg, merged...)
6868
}
6969

klogr/klogr.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,11 @@ func (l *klogger) Info(level int, msg string, kvList ...interface{}) {
138138
switch l.format {
139139
case FormatSerialize:
140140
msgStr := flatten("msg", msg)
141-
merged := serialize.MergeKVs(l.values, kvList)
141+
merged := serialize.WithValues(l.values, kvList)
142142
kvStr := flatten(merged...)
143143
klog.VDepth(l.callDepth+1, klog.Level(level)).InfoDepth(l.callDepth+1, msgStr, " ", kvStr)
144144
case FormatKlog:
145-
merged := serialize.MergeKVs(l.values, kvList)
145+
merged := serialize.WithValues(l.values, kvList)
146146
klog.VDepth(l.callDepth+1, klog.Level(level)).InfoSDepth(l.callDepth+1, msg, merged...)
147147
}
148148
}
@@ -160,11 +160,11 @@ func (l *klogger) Error(err error, msg string, kvList ...interface{}) {
160160
switch l.format {
161161
case FormatSerialize:
162162
errStr := flatten("error", loggableErr)
163-
merged := serialize.MergeKVs(l.values, kvList)
163+
merged := serialize.WithValues(l.values, kvList)
164164
kvStr := flatten(merged...)
165165
klog.ErrorDepth(l.callDepth+1, msgStr, " ", errStr, " ", kvStr)
166166
case FormatKlog:
167-
merged := serialize.MergeKVs(l.values, kvList)
167+
merged := serialize.WithValues(l.values, kvList)
168168
klog.ErrorSDepth(l.callDepth+1, err, msg, merged...)
169169
}
170170
}

klogr/klogr_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,22 +69,22 @@ func testOutput(t *testing.T, format string) {
6969
expectedKlogOutput: `"test" logger="hello.world" akey="avalue"
7070
`,
7171
},
72-
"may print duplicate keys with the same value": {
72+
"de-duplicate keys with the same value": {
7373
klogr: createLogger().V(0),
7474
text: "test",
7575
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue"},
7676
expectedOutput: `"msg"="test" "akey"="avalue"
7777
`,
78-
expectedKlogOutput: `"test" akey="avalue" akey="avalue"
78+
expectedKlogOutput: `"test" akey="avalue"
7979
`,
8080
},
81-
"may print duplicate keys when the values are passed to Info": {
81+
"de-duplicate keys when the values are passed to Info": {
8282
klogr: createLogger().V(0),
8383
text: "test",
8484
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue2"},
8585
expectedOutput: `"msg"="test" "akey"="avalue2"
8686
`,
87-
expectedKlogOutput: `"test" akey="avalue" akey="avalue2"
87+
expectedKlogOutput: `"test" akey="avalue2"
8888
`,
8989
},
9090
"should only print the duplicate key that is passed to Info if one was passed to the logger": {

ktesting/testinglogger_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ func TestInfo(t *testing.T) {
5757
"should not print duplicate keys with the same value": {
5858
text: "test",
5959
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue"},
60-
expectedOutput: `Ixxx test akey="avalue" akey="avalue"
60+
expectedOutput: `Ixxx test akey="avalue"
6161
`,
6262
},
63-
"should only print the last duplicate key when the values are passed to Info": {
63+
"should print no duplicate keys when the values are passed to Info": {
6464
text: "test",
6565
keysAndValues: []interface{}{"akey", "avalue", "akey", "avalue2"},
66-
expectedOutput: `Ixxx test akey="avalue" akey="avalue2"
66+
expectedOutput: `Ixxx test akey="avalue2"
6767
`,
6868
},
6969
"should only print the duplicate key that is passed to Info if one was passed to the logger": {

0 commit comments

Comments
 (0)