flexstack · jaredLunde · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -15,13 +15,10 @@ jobs:
         uses: actions/checkout@v4
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-      - name: Setup asdf
-        uses: asdf-vm/actions/install@v3
+      - name: Setup Mise
+        uses: jdx/mise-action@v2
       - name: Install dependencies
         run: go mod download
-      - name: Add asdf shims to PATH
-        run: |
-          echo "${HOME}/.asdf/shims" >> $GITHUB_PATH
       - name: Lint
         run: go vet ./...
       - name: Run tests

diff --git a/.tool-versions b/.tool-versions
diff --git a/README.md b/README.md
@@ -78,35 +78,42 @@ func main() {
 
 This package is a fork of [github.com/gofrs/uuid](https://github.com/gofrs/uuid) with the following changes:
 
-- 2x improvement to `FromString`, `UnmarshalText`, and `UnmarshalJSON` performance
 - Adds base58 encoding.
 - Allows people to set a default format (i.e. base58, hash, canonical)
 - Scans nil UUIDs from SQL databases as nil UUIDs (00000000-0000-0000-0000-000000000000) instead of `nil`.
 - Fixes issue with [TimestampFromV7](https://github.com/gofrs/uuid/issues/128) not being spec compliant.
 - Removed v1, v3, v5 UUIDs.
 - Removed support for braced and URN string formats.
 
+## Performance optimizations
+
+This library includes additional performance optimizations beyond the original fork:
+
+- **Zero allocations** for all parsing operations
+- **Optimized hex encoding/decoding** with lookup tables and unrolled loops
+- **Optimized base58 decoding** with stack allocation and loop unrolling (~29% faster)
+
 ## Benchmarks
 
-MacBook Air (15-inch, M2, 2023) Apple M2, 24GB RAM, MacOS 14.4.1
+MacBook Air (15-inch, M2, 2023) Apple M2, 24GB RAM, MacOS 15.3.2
 
-### Format()
+### UUID generation
 ```
-Format(FormatCanonical)        44625793         26.54 ns/op           48 B/op          1 allocs/op
-Format(FormatHash)             44022964         26.85 ns/op           32 B/op          1 allocs/op
-Format(FormatBase58)           5350190          224.0 ns/op           24 B/op          1 allocs/op
+BenchmarkNewV4         1516407	       790.7 ns/op	      16 B/op	       1 allocs/op
+BenchmarkNewV7         1816982	       659.3 ns/op	      16 B/op	       1 allocs/op
 ```
 
-### FromString()
-```
-FromString(FormatCanonical)    70893008         16.88 ns/op           0 B/op           0 allocs/op
-FromString(FormatBase58)       16760137         71.77 ns/op           0 B/op           0 allocs/op
+### String Operations
 ```
+BenchmarkString/canonical     	59380742	        20.03 ns/op	      48 B/op	       1 allocs/op
+BenchmarkString/hash          	57661926	        20.09 ns/op	      32 B/op	       1 allocs/op
+BenchmarkString/base58        	 5236279	        231.6 ns/op	      24 B/op	       1 allocs/op
 
-### NewVx()
-```
-NewV4()                        2961621          401.6 ns/op           16 B/op          1 allocs/op
-NewV7()                        3859464          308.7 ns/op           16 B/op          1 allocs/op
+BenchmarkFromBytes             504783348	        2.380 ns/op	       0 B/op	       0 allocs/op
+
+BenchmarkFromString/canonical  153610305	         7.834 ns/op	     0 B/op	       0 allocs/op
+BenchmarkFromString/hash       158399199	         7.480 ns/op	     0 B/op	       0 allocs/op
+BenchmarkFromString/base58      24494169	        48.91 ns/op	     0 B/op	       0 allocs/op
 ```
 
 ## Contributing
@@ -115,4 +122,4 @@ Read the [CONTRIBUTING.md](CONTRIBUTING.md) guide to learn how to contribute to
 
 ## License
 
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
diff --git a/base58/base58.go b/base58/base58.go
@@ -70,29 +70,87 @@ func UnmarshalString(dst []byte, str string) error {
 }
 
 func UnmarshalBytes(dst, src []byte) error {
-	outi := make([]uint32, 4) // (uuidSize + 3) / 4
-
-	for i := 0; i < len(src); i++ {
-		c := decode[src[i]]
-
-		for j := len(outi) - 1; j >= 0; j-- {
-			t := uint64(outi[j])*58 + c
-			c = t >> 32
-			outi[j] = uint32(t & 0xffffffff)
+	// Use stack allocation for better performance
+	var outi [4]uint32
+
+	// Optimized for the common case of 22-byte base58 UUID
+	if len(src) == 22 {
+		// Unrolled loop for base58 decoding
+		// Process all 22 characters with partially unrolled loop
+		var c uint64
+
+		// Unroll by 2 for better performance
+		for i := 0; i < 22; i += 2 {
+			// First character
+			c = decode[src[i]]
+			t3 := uint64(outi[3])*58 + c
+			c = t3 >> 32
+			outi[3] = uint32(t3)
+
+			t2 := uint64(outi[2])*58 + c
+			c = t2 >> 32
+			outi[2] = uint32(t2)
+
+			t1 := uint64(outi[1])*58 + c
+			c = t1 >> 32
+			outi[1] = uint32(t1)
+
+			t0 := uint64(outi[0])*58 + c
+			outi[0] = uint32(t0)
+
+			// Second character (if exists)
+			if i+1 < 22 {
+				c = decode[src[i+1]]
+				t3 = uint64(outi[3])*58 + c
+				c = t3 >> 32
+				outi[3] = uint32(t3)
+
+				t2 = uint64(outi[2])*58 + c
+				c = t2 >> 32
+				outi[2] = uint32(t2)
+
+				t1 = uint64(outi[1])*58 + c
+				c = t1 >> 32
+				outi[1] = uint32(t1)
+
+				t0 = uint64(outi[0])*58 + c
+				outi[0] = uint32(t0)
+			}
 		}
-	}
-
-	mask := uint32(24) // (((uuidSize%4) * 8) || 32) - 8
-	outLen := 0
-	for j := 0; j < len(outi); j++ {
-		for mask < 32 {
-			dst[outLen] = byte(outi[j] >> mask)
-			mask -= 8
-			outLen++
+	} else {
+		// Fallback for non-standard lengths
+		for i := 0; i < len(src); i++ {
+			c := decode[src[i]]
+
+			for j := 3; j >= 0; j-- {
+				t := uint64(outi[j])*58 + c
+				c = t >> 32
+				outi[j] = uint32(t)
+			}
 		}
-		mask = 24
 	}
 
+	// Unrolled output conversion
+	dst[0] = byte(outi[0] >> 24)
+	dst[1] = byte(outi[0] >> 16)
+	dst[2] = byte(outi[0] >> 8)
+	dst[3] = byte(outi[0])
+
+	dst[4] = byte(outi[1] >> 24)
+	dst[5] = byte(outi[1] >> 16)
+	dst[6] = byte(outi[1] >> 8)
+	dst[7] = byte(outi[1])
+
+	dst[8] = byte(outi[2] >> 24)
+	dst[9] = byte(outi[2] >> 16)
+	dst[10] = byte(outi[2] >> 8)
+	dst[11] = byte(outi[2])
+
+	dst[12] = byte(outi[3] >> 24)
+	dst[13] = byte(outi[3] >> 16)
+	dst[14] = byte(outi[3] >> 8)
+	dst[15] = byte(outi[3])
+
 	return nil
 }
 
@@ -103,7 +161,6 @@ func Encode(bin []byte) string {
 	out := [22]byte{}
 	var outIndex int = maxEncodedSize - 1 // Start filling from the end
 
-	// Convert binary to base58.
 	for i := 0; i < uuidSize; i++ {
 		carry := uint32(bin[i])
 
@@ -113,23 +170,29 @@ func Encode(bin []byte) string {
 			carry /= 58
 		}
 
-		// Move the start index left if we're still processing non-zero bytes.
 		for carry > 0 {
 			outIndex--
 			out[outIndex] = byte(carry % 58)
 			carry /= 58
 		}
 	}
 
-	// Convert numerical base58 values to encoded characters.
 	for i := outIndex; i < maxEncodedSize; i++ {
 		out[i] = encode[out[i]]
 	}
 
-	encodedResult := string(out[outIndex:])
-	if outIndex > 0 {
-		encodedResult = padLeft[outIndex] + encodedResult
+	if outIndex == 0 {
+		return string(out[:])
+	}
+
+	totalLen := 22 // Always 22 for padded result
+	result := make([]byte, totalLen)
+
+	// Fill padding with '1' characters
+	for i := 0; i < outIndex; i++ {
+		result[i] = '1'
 	}
 
-	return encodedResult
+	copy(result[outIndex:], out[outIndex:])
+	return string(result)
 }
diff --git a/base58/base58_test.go b/base58/base58_test.go
@@ -58,3 +58,32 @@ func BenchmarkDecode(b *testing.B) {
 		Decode(testPairs[i].enc)
 	}
 }
+
+var testCases = []string{
+	"1C9z3nFjeJ44HMBeuqGNxt",
+	"6ba7b8109dad11d180b400c04f",
+	"Xk7pWZaRRFkqbVa3ma7F5f",
+	"11111111111111111111EJ",
+	"zzzzzzzzzzzzzzzzzzzzzz",
+}
+
+func BenchmarkUnmarshalBytesNew(b *testing.B) {
+	dst := make([]byte, 16)
+	src := []byte(testCases[0])
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = UnmarshalBytes(dst, src)
+	}
+}
+
+func BenchmarkUnmarshalBytesNewMultiple(b *testing.B) {
+	dst := make([]byte, 16)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		for _, tc := range testCases {
+			_ = UnmarshalBytes(dst, []byte(tc))
+		}
+	}
+}
diff --git a/codec.go b/codec.go
@@ -92,28 +92,46 @@ func (u *UUID) Parse(s string) error {
 		return nil
 
 	case 32: // hash
-		for i := 0; i < 32; i += 2 {
-			v1 := hexLookupTable[s[i]]
-			v2 := hexLookupTable[s[i+1]]
-			if v1|v2 == 255 {
-				return errInvalidFormat
-			}
-			u[i/2] = (v1 << 4) | v2
-		}
+		// Unrolled hash parsing loop - 16 iterations, 2 chars per byte
+		v1 := hexLookupTable[s[0]]; v2 := hexLookupTable[s[1]]; if v1|v2 == 255 { return errInvalidFormat }; u[0] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[2]]; v2 = hexLookupTable[s[3]]; if v1|v2 == 255 { return errInvalidFormat }; u[1] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[4]]; v2 = hexLookupTable[s[5]]; if v1|v2 == 255 { return errInvalidFormat }; u[2] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[6]]; v2 = hexLookupTable[s[7]]; if v1|v2 == 255 { return errInvalidFormat }; u[3] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[8]]; v2 = hexLookupTable[s[9]]; if v1|v2 == 255 { return errInvalidFormat }; u[4] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[10]]; v2 = hexLookupTable[s[11]]; if v1|v2 == 255 { return errInvalidFormat }; u[5] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[12]]; v2 = hexLookupTable[s[13]]; if v1|v2 == 255 { return errInvalidFormat }; u[6] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[14]]; v2 = hexLookupTable[s[15]]; if v1|v2 == 255 { return errInvalidFormat }; u[7] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[16]]; v2 = hexLookupTable[s[17]]; if v1|v2 == 255 { return errInvalidFormat }; u[8] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[18]]; v2 = hexLookupTable[s[19]]; if v1|v2 == 255 { return errInvalidFormat }; u[9] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[20]]; v2 = hexLookupTable[s[21]]; if v1|v2 == 255 { return errInvalidFormat }; u[10] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[22]]; v2 = hexLookupTable[s[23]]; if v1|v2 == 255 { return errInvalidFormat }; u[11] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[24]]; v2 = hexLookupTable[s[25]]; if v1|v2 == 255 { return errInvalidFormat }; u[12] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[26]]; v2 = hexLookupTable[s[27]]; if v1|v2 == 255 { return errInvalidFormat }; u[13] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[28]]; v2 = hexLookupTable[s[29]]; if v1|v2 == 255 { return errInvalidFormat }; u[14] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[30]]; v2 = hexLookupTable[s[31]]; if v1|v2 == 255 { return errInvalidFormat }; u[15] = (v1 << 4) | v2
 		return nil
 
 	case 36: // canonical
 		if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' {
 			return fmt.Errorf("uuid: incorrect UUID format in string %q", s)
 		}
-		for i, x := range canonicalByteRange {
-			v1 := hexLookupTable[s[x]]
-			v2 := hexLookupTable[s[x+1]]
-			if v1|v2 == 255 {
-				return errInvalidFormat
-			}
-			u[i] = (v1 << 4) | v2
-		}
+		// Unrolled canonical parsing loop - canonicalByteRange: [0, 2, 4, 6, 9, 11, 14, 16, 19, 21, 24, 26, 28, 30, 32, 34]
+		v1 := hexLookupTable[s[0]]; v2 := hexLookupTable[s[1]]; if v1|v2 == 255 { return errInvalidFormat }; u[0] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[2]]; v2 = hexLookupTable[s[3]]; if v1|v2 == 255 { return errInvalidFormat }; u[1] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[4]]; v2 = hexLookupTable[s[5]]; if v1|v2 == 255 { return errInvalidFormat }; u[2] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[6]]; v2 = hexLookupTable[s[7]]; if v1|v2 == 255 { return errInvalidFormat }; u[3] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[9]]; v2 = hexLookupTable[s[10]]; if v1|v2 == 255 { return errInvalidFormat }; u[4] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[11]]; v2 = hexLookupTable[s[12]]; if v1|v2 == 255 { return errInvalidFormat }; u[5] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[14]]; v2 = hexLookupTable[s[15]]; if v1|v2 == 255 { return errInvalidFormat }; u[6] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[16]]; v2 = hexLookupTable[s[17]]; if v1|v2 == 255 { return errInvalidFormat }; u[7] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[19]]; v2 = hexLookupTable[s[20]]; if v1|v2 == 255 { return errInvalidFormat }; u[8] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[21]]; v2 = hexLookupTable[s[22]]; if v1|v2 == 255 { return errInvalidFormat }; u[9] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[24]]; v2 = hexLookupTable[s[25]]; if v1|v2 == 255 { return errInvalidFormat }; u[10] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[26]]; v2 = hexLookupTable[s[27]]; if v1|v2 == 255 { return errInvalidFormat }; u[11] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[28]]; v2 = hexLookupTable[s[29]]; if v1|v2 == 255 { return errInvalidFormat }; u[12] = (v1 << 4) | v2 
+		v1 = hexLookupTable[s[30]]; v2 = hexLookupTable[s[31]]; if v1|v2 == 255 { return errInvalidFormat }; u[13] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[32]]; v2 = hexLookupTable[s[33]]; if v1|v2 == 255 { return errInvalidFormat }; u[14] = (v1 << 4) | v2
+		v1 = hexLookupTable[s[34]]; v2 = hexLookupTable[s[35]]; if v1|v2 == 255 { return errInvalidFormat }; u[15] = (v1 << 4) | v2
 		return nil
 
 	default:

diff --git a/codec_test.go b/codec_test.go
@@ -466,6 +466,11 @@ func BenchmarkFromString(b *testing.B) {
 			FromString("6ba7b810-9dad-11d1-80b4-00c04fd430c8")
 		}
 	})
+	b.Run("hash", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			FromString("6ba7b8109dad11d180b400c04fd430c8")
+		}
+	})
 	b.Run("base58", func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			FromString("EJ34kCVxxF9jHMKD4EgrAK")

diff --git a/mise.toml b/mise.toml
@@ -0,0 +1,2 @@
+[tools]
+go = "1.19"