Skip to content

Commit f20f8c8

Browse files
authored
unroll loops when formatting/parsing (#1)
1 parent f3727ab commit f20f8c8

File tree

10 files changed

+522
-74
lines changed

10 files changed

+522
-74
lines changed

.github/workflows/pr.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,10 @@ jobs:
1515
uses: actions/checkout@v4
1616
with:
1717
ref: ${{ github.event.pull_request.head.sha }}
18-
- name: Setup asdf
19-
uses: asdf-vm/actions/install@v3
18+
- name: Setup Mise
19+
uses: jdx/mise-action@v2
2020
- name: Install dependencies
2121
run: go mod download
22-
- name: Add asdf shims to PATH
23-
run: |
24-
echo "${HOME}/.asdf/shims" >> $GITHUB_PATH
2522
- name: Lint
2623
run: go vet ./...
2724
- name: Run tests

.tool-versions

Lines changed: 0 additions & 1 deletion
This file was deleted.

README.md

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,35 +78,42 @@ func main() {
7878

7979
This package is a fork of [github.com/gofrs/uuid](https://github.com/gofrs/uuid) with the following changes:
8080

81-
- 2x improvement to `FromString`, `UnmarshalText`, and `UnmarshalJSON` performance
8281
- Adds base58 encoding.
8382
- Allows people to set a default format (i.e. base58, hash, canonical)
8483
- Scans nil UUIDs from SQL databases as nil UUIDs (00000000-0000-0000-0000-000000000000) instead of `nil`.
8584
- Fixes issue with [TimestampFromV7](https://github.com/gofrs/uuid/issues/128) not being spec compliant.
8685
- Removed v1, v3, v5 UUIDs.
8786
- Removed support for braced and URN string formats.
8887

88+
## Performance optimizations
89+
90+
This library includes additional performance optimizations beyond the original fork:
91+
92+
- **Zero allocations** for all parsing operations
93+
- **Optimized hex encoding/decoding** with lookup tables and unrolled loops
94+
- **Optimized base58 decoding** with stack allocation and loop unrolling (~29% faster)
95+
8996
## Benchmarks
9097

91-
MacBook Air (15-inch, M2, 2023) Apple M2, 24GB RAM, MacOS 14.4.1
98+
MacBook Air (15-inch, M2, 2023) Apple M2, 24GB RAM, MacOS 15.3.2
9299

93-
### Format()
100+
### UUID generation
94101
```
95-
Format(FormatCanonical) 44625793 26.54 ns/op 48 B/op 1 allocs/op
96-
Format(FormatHash) 44022964 26.85 ns/op 32 B/op 1 allocs/op
97-
Format(FormatBase58) 5350190 224.0 ns/op 24 B/op 1 allocs/op
102+
BenchmarkNewV4 1516407 790.7 ns/op 16 B/op 1 allocs/op
103+
BenchmarkNewV7 1816982 659.3 ns/op 16 B/op 1 allocs/op
98104
```
99105

100-
### FromString()
101-
```
102-
FromString(FormatCanonical) 70893008 16.88 ns/op 0 B/op 0 allocs/op
103-
FromString(FormatBase58) 16760137 71.77 ns/op 0 B/op 0 allocs/op
106+
### String Operations
104107
```
108+
BenchmarkString/canonical 59380742 20.03 ns/op 48 B/op 1 allocs/op
109+
BenchmarkString/hash 57661926 20.09 ns/op 32 B/op 1 allocs/op
110+
BenchmarkString/base58 5236279 231.6 ns/op 24 B/op 1 allocs/op
105111
106-
### NewVx()
107-
```
108-
NewV4() 2961621 401.6 ns/op 16 B/op 1 allocs/op
109-
NewV7() 3859464 308.7 ns/op 16 B/op 1 allocs/op
112+
BenchmarkFromBytes 504783348 2.380 ns/op 0 B/op 0 allocs/op
113+
114+
BenchmarkFromString/canonical 153610305 7.834 ns/op 0 B/op 0 allocs/op
115+
BenchmarkFromString/hash 158399199 7.480 ns/op 0 B/op 0 allocs/op
116+
BenchmarkFromString/base58 24494169 48.91 ns/op 0 B/op 0 allocs/op
110117
```
111118

112119
## Contributing
@@ -115,4 +122,4 @@ Read the [CONTRIBUTING.md](CONTRIBUTING.md) guide to learn how to contribute to
115122

116123
## License
117124

118-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
125+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

base58/base58.go

Lines changed: 89 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -70,29 +70,87 @@ func UnmarshalString(dst []byte, str string) error {
7070
}
7171

7272
func UnmarshalBytes(dst, src []byte) error {
73-
outi := make([]uint32, 4) // (uuidSize + 3) / 4
74-
75-
for i := 0; i < len(src); i++ {
76-
c := decode[src[i]]
77-
78-
for j := len(outi) - 1; j >= 0; j-- {
79-
t := uint64(outi[j])*58 + c
80-
c = t >> 32
81-
outi[j] = uint32(t & 0xffffffff)
73+
// Use stack allocation for better performance
74+
var outi [4]uint32
75+
76+
// Optimized for the common case of 22-byte base58 UUID
77+
if len(src) == 22 {
78+
// Unrolled loop for base58 decoding
79+
// Process all 22 characters with partially unrolled loop
80+
var c uint64
81+
82+
// Unroll by 2 for better performance
83+
for i := 0; i < 22; i += 2 {
84+
// First character
85+
c = decode[src[i]]
86+
t3 := uint64(outi[3])*58 + c
87+
c = t3 >> 32
88+
outi[3] = uint32(t3)
89+
90+
t2 := uint64(outi[2])*58 + c
91+
c = t2 >> 32
92+
outi[2] = uint32(t2)
93+
94+
t1 := uint64(outi[1])*58 + c
95+
c = t1 >> 32
96+
outi[1] = uint32(t1)
97+
98+
t0 := uint64(outi[0])*58 + c
99+
outi[0] = uint32(t0)
100+
101+
// Second character (if exists)
102+
if i+1 < 22 {
103+
c = decode[src[i+1]]
104+
t3 = uint64(outi[3])*58 + c
105+
c = t3 >> 32
106+
outi[3] = uint32(t3)
107+
108+
t2 = uint64(outi[2])*58 + c
109+
c = t2 >> 32
110+
outi[2] = uint32(t2)
111+
112+
t1 = uint64(outi[1])*58 + c
113+
c = t1 >> 32
114+
outi[1] = uint32(t1)
115+
116+
t0 = uint64(outi[0])*58 + c
117+
outi[0] = uint32(t0)
118+
}
82119
}
83-
}
84-
85-
mask := uint32(24) // (((uuidSize%4) * 8) || 32) - 8
86-
outLen := 0
87-
for j := 0; j < len(outi); j++ {
88-
for mask < 32 {
89-
dst[outLen] = byte(outi[j] >> mask)
90-
mask -= 8
91-
outLen++
120+
} else {
121+
// Fallback for non-standard lengths
122+
for i := 0; i < len(src); i++ {
123+
c := decode[src[i]]
124+
125+
for j := 3; j >= 0; j-- {
126+
t := uint64(outi[j])*58 + c
127+
c = t >> 32
128+
outi[j] = uint32(t)
129+
}
92130
}
93-
mask = 24
94131
}
95132

133+
// Unrolled output conversion
134+
dst[0] = byte(outi[0] >> 24)
135+
dst[1] = byte(outi[0] >> 16)
136+
dst[2] = byte(outi[0] >> 8)
137+
dst[3] = byte(outi[0])
138+
139+
dst[4] = byte(outi[1] >> 24)
140+
dst[5] = byte(outi[1] >> 16)
141+
dst[6] = byte(outi[1] >> 8)
142+
dst[7] = byte(outi[1])
143+
144+
dst[8] = byte(outi[2] >> 24)
145+
dst[9] = byte(outi[2] >> 16)
146+
dst[10] = byte(outi[2] >> 8)
147+
dst[11] = byte(outi[2])
148+
149+
dst[12] = byte(outi[3] >> 24)
150+
dst[13] = byte(outi[3] >> 16)
151+
dst[14] = byte(outi[3] >> 8)
152+
dst[15] = byte(outi[3])
153+
96154
return nil
97155
}
98156

@@ -103,7 +161,6 @@ func Encode(bin []byte) string {
103161
out := [22]byte{}
104162
var outIndex int = maxEncodedSize - 1 // Start filling from the end
105163

106-
// Convert binary to base58.
107164
for i := 0; i < uuidSize; i++ {
108165
carry := uint32(bin[i])
109166

@@ -113,23 +170,29 @@ func Encode(bin []byte) string {
113170
carry /= 58
114171
}
115172

116-
// Move the start index left if we're still processing non-zero bytes.
117173
for carry > 0 {
118174
outIndex--
119175
out[outIndex] = byte(carry % 58)
120176
carry /= 58
121177
}
122178
}
123179

124-
// Convert numerical base58 values to encoded characters.
125180
for i := outIndex; i < maxEncodedSize; i++ {
126181
out[i] = encode[out[i]]
127182
}
128183

129-
encodedResult := string(out[outIndex:])
130-
if outIndex > 0 {
131-
encodedResult = padLeft[outIndex] + encodedResult
184+
if outIndex == 0 {
185+
return string(out[:])
186+
}
187+
188+
totalLen := 22 // Always 22 for padded result
189+
result := make([]byte, totalLen)
190+
191+
// Fill padding with '1' characters
192+
for i := 0; i < outIndex; i++ {
193+
result[i] = '1'
132194
}
133195

134-
return encodedResult
196+
copy(result[outIndex:], out[outIndex:])
197+
return string(result)
135198
}

base58/base58_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,32 @@ func BenchmarkDecode(b *testing.B) {
5858
Decode(testPairs[i].enc)
5959
}
6060
}
61+
62+
var testCases = []string{
63+
"1C9z3nFjeJ44HMBeuqGNxt",
64+
"6ba7b8109dad11d180b400c04f",
65+
"Xk7pWZaRRFkqbVa3ma7F5f",
66+
"11111111111111111111EJ",
67+
"zzzzzzzzzzzzzzzzzzzzzz",
68+
}
69+
70+
func BenchmarkUnmarshalBytesNew(b *testing.B) {
71+
dst := make([]byte, 16)
72+
src := []byte(testCases[0])
73+
74+
b.ResetTimer()
75+
for i := 0; i < b.N; i++ {
76+
_ = UnmarshalBytes(dst, src)
77+
}
78+
}
79+
80+
func BenchmarkUnmarshalBytesNewMultiple(b *testing.B) {
81+
dst := make([]byte, 16)
82+
83+
b.ResetTimer()
84+
for i := 0; i < b.N; i++ {
85+
for _, tc := range testCases {
86+
_ = UnmarshalBytes(dst, []byte(tc))
87+
}
88+
}
89+
}

codec.go

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -92,28 +92,46 @@ func (u *UUID) Parse(s string) error {
9292
return nil
9393

9494
case 32: // hash
95-
for i := 0; i < 32; i += 2 {
96-
v1 := hexLookupTable[s[i]]
97-
v2 := hexLookupTable[s[i+1]]
98-
if v1|v2 == 255 {
99-
return errInvalidFormat
100-
}
101-
u[i/2] = (v1 << 4) | v2
102-
}
95+
// Unrolled hash parsing loop - 16 iterations, 2 chars per byte
96+
v1 := hexLookupTable[s[0]]; v2 := hexLookupTable[s[1]]; if v1|v2 == 255 { return errInvalidFormat }; u[0] = (v1 << 4) | v2
97+
v1 = hexLookupTable[s[2]]; v2 = hexLookupTable[s[3]]; if v1|v2 == 255 { return errInvalidFormat }; u[1] = (v1 << 4) | v2
98+
v1 = hexLookupTable[s[4]]; v2 = hexLookupTable[s[5]]; if v1|v2 == 255 { return errInvalidFormat }; u[2] = (v1 << 4) | v2
99+
v1 = hexLookupTable[s[6]]; v2 = hexLookupTable[s[7]]; if v1|v2 == 255 { return errInvalidFormat }; u[3] = (v1 << 4) | v2
100+
v1 = hexLookupTable[s[8]]; v2 = hexLookupTable[s[9]]; if v1|v2 == 255 { return errInvalidFormat }; u[4] = (v1 << 4) | v2
101+
v1 = hexLookupTable[s[10]]; v2 = hexLookupTable[s[11]]; if v1|v2 == 255 { return errInvalidFormat }; u[5] = (v1 << 4) | v2
102+
v1 = hexLookupTable[s[12]]; v2 = hexLookupTable[s[13]]; if v1|v2 == 255 { return errInvalidFormat }; u[6] = (v1 << 4) | v2
103+
v1 = hexLookupTable[s[14]]; v2 = hexLookupTable[s[15]]; if v1|v2 == 255 { return errInvalidFormat }; u[7] = (v1 << 4) | v2
104+
v1 = hexLookupTable[s[16]]; v2 = hexLookupTable[s[17]]; if v1|v2 == 255 { return errInvalidFormat }; u[8] = (v1 << 4) | v2
105+
v1 = hexLookupTable[s[18]]; v2 = hexLookupTable[s[19]]; if v1|v2 == 255 { return errInvalidFormat }; u[9] = (v1 << 4) | v2
106+
v1 = hexLookupTable[s[20]]; v2 = hexLookupTable[s[21]]; if v1|v2 == 255 { return errInvalidFormat }; u[10] = (v1 << 4) | v2
107+
v1 = hexLookupTable[s[22]]; v2 = hexLookupTable[s[23]]; if v1|v2 == 255 { return errInvalidFormat }; u[11] = (v1 << 4) | v2
108+
v1 = hexLookupTable[s[24]]; v2 = hexLookupTable[s[25]]; if v1|v2 == 255 { return errInvalidFormat }; u[12] = (v1 << 4) | v2
109+
v1 = hexLookupTable[s[26]]; v2 = hexLookupTable[s[27]]; if v1|v2 == 255 { return errInvalidFormat }; u[13] = (v1 << 4) | v2
110+
v1 = hexLookupTable[s[28]]; v2 = hexLookupTable[s[29]]; if v1|v2 == 255 { return errInvalidFormat }; u[14] = (v1 << 4) | v2
111+
v1 = hexLookupTable[s[30]]; v2 = hexLookupTable[s[31]]; if v1|v2 == 255 { return errInvalidFormat }; u[15] = (v1 << 4) | v2
103112
return nil
104113

105114
case 36: // canonical
106115
if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' {
107116
return fmt.Errorf("uuid: incorrect UUID format in string %q", s)
108117
}
109-
for i, x := range canonicalByteRange {
110-
v1 := hexLookupTable[s[x]]
111-
v2 := hexLookupTable[s[x+1]]
112-
if v1|v2 == 255 {
113-
return errInvalidFormat
114-
}
115-
u[i] = (v1 << 4) | v2
116-
}
118+
// Unrolled canonical parsing loop - canonicalByteRange: [0, 2, 4, 6, 9, 11, 14, 16, 19, 21, 24, 26, 28, 30, 32, 34]
119+
v1 := hexLookupTable[s[0]]; v2 := hexLookupTable[s[1]]; if v1|v2 == 255 { return errInvalidFormat }; u[0] = (v1 << 4) | v2
120+
v1 = hexLookupTable[s[2]]; v2 = hexLookupTable[s[3]]; if v1|v2 == 255 { return errInvalidFormat }; u[1] = (v1 << 4) | v2
121+
v1 = hexLookupTable[s[4]]; v2 = hexLookupTable[s[5]]; if v1|v2 == 255 { return errInvalidFormat }; u[2] = (v1 << 4) | v2
122+
v1 = hexLookupTable[s[6]]; v2 = hexLookupTable[s[7]]; if v1|v2 == 255 { return errInvalidFormat }; u[3] = (v1 << 4) | v2
123+
v1 = hexLookupTable[s[9]]; v2 = hexLookupTable[s[10]]; if v1|v2 == 255 { return errInvalidFormat }; u[4] = (v1 << 4) | v2
124+
v1 = hexLookupTable[s[11]]; v2 = hexLookupTable[s[12]]; if v1|v2 == 255 { return errInvalidFormat }; u[5] = (v1 << 4) | v2
125+
v1 = hexLookupTable[s[14]]; v2 = hexLookupTable[s[15]]; if v1|v2 == 255 { return errInvalidFormat }; u[6] = (v1 << 4) | v2
126+
v1 = hexLookupTable[s[16]]; v2 = hexLookupTable[s[17]]; if v1|v2 == 255 { return errInvalidFormat }; u[7] = (v1 << 4) | v2
127+
v1 = hexLookupTable[s[19]]; v2 = hexLookupTable[s[20]]; if v1|v2 == 255 { return errInvalidFormat }; u[8] = (v1 << 4) | v2
128+
v1 = hexLookupTable[s[21]]; v2 = hexLookupTable[s[22]]; if v1|v2 == 255 { return errInvalidFormat }; u[9] = (v1 << 4) | v2
129+
v1 = hexLookupTable[s[24]]; v2 = hexLookupTable[s[25]]; if v1|v2 == 255 { return errInvalidFormat }; u[10] = (v1 << 4) | v2
130+
v1 = hexLookupTable[s[26]]; v2 = hexLookupTable[s[27]]; if v1|v2 == 255 { return errInvalidFormat }; u[11] = (v1 << 4) | v2
131+
v1 = hexLookupTable[s[28]]; v2 = hexLookupTable[s[29]]; if v1|v2 == 255 { return errInvalidFormat }; u[12] = (v1 << 4) | v2
132+
v1 = hexLookupTable[s[30]]; v2 = hexLookupTable[s[31]]; if v1|v2 == 255 { return errInvalidFormat }; u[13] = (v1 << 4) | v2
133+
v1 = hexLookupTable[s[32]]; v2 = hexLookupTable[s[33]]; if v1|v2 == 255 { return errInvalidFormat }; u[14] = (v1 << 4) | v2
134+
v1 = hexLookupTable[s[34]]; v2 = hexLookupTable[s[35]]; if v1|v2 == 255 { return errInvalidFormat }; u[15] = (v1 << 4) | v2
117135
return nil
118136

119137
default:

codec_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,11 @@ func BenchmarkFromString(b *testing.B) {
466466
FromString("6ba7b810-9dad-11d1-80b4-00c04fd430c8")
467467
}
468468
})
469+
b.Run("hash", func(b *testing.B) {
470+
for i := 0; i < b.N; i++ {
471+
FromString("6ba7b8109dad11d180b400c04fd430c8")
472+
}
473+
})
469474
b.Run("base58", func(b *testing.B) {
470475
for i := 0; i < b.N; i++ {
471476
FromString("EJ34kCVxxF9jHMKD4EgrAK")

mise.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[tools]
2+
go = "1.19"

0 commit comments

Comments
 (0)