Skip to content

Commit e15c23e

Browse files
author
John Mason
committed
Use xxhash
1 parent e59be01 commit e15c23e

File tree

3 files changed

+38
-37
lines changed

3 files changed

+38
-37
lines changed

index/indexdata.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ type indexData struct {
109109
}
110110

111111
// docMatchTreeCache is a cache for docMatchTree objects so they don't need to be recomputed
112-
type docMatchTreeCache map[string]*docMatchTree
112+
type docMatchTreeCache map[struct{ field, value string }]*docMatchTree
113113

114114
type symbolData struct {
115115
// symContent stores Symbol.Sym and Symbol.Parent.

index/matchtree.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ import (
2323
"strings"
2424
"unicode/utf8"
2525

26-
"crypto/sha256"
27-
26+
"github.com/cespare/xxhash/v2"
2827
"github.com/grafana/regexp"
2928

3029
"github.com/sourcegraph/zoekt"
@@ -1061,7 +1060,9 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error)
10611060
}, nil
10621061

10631062
case *query.Meta:
1064-
cacheKey := queryMetaCacheKey(s.Field, s.Value)
1063+
checksum := queryMetaChecksum(s.Field, s.Value)
1064+
cacheKey := struct{ field, value string }{"Meta", checksum}
1065+
10651066
if cached, ok := d.docMatchTreeCache[cacheKey]; ok {
10661067
return cached, nil
10671068
}
@@ -1215,7 +1216,9 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error)
12151216
}, nil
12161217

12171218
case *query.RepoIDs:
1218-
cacheKey := queryRepoIdsCacheKey(d.repoMetaData)
1219+
checksum := queryRepoIdsChecksum(d.repoMetaData)
1220+
cacheKey := struct{ field, value string }{"RepoIDs", checksum}
1221+
12191222
if cached, ok := d.docMatchTreeCache[cacheKey]; ok {
12201223
return cached, nil
12211224
}
@@ -1458,17 +1461,19 @@ func isRegexpAll(r *syntax.Regexp) bool {
14581461
}
14591462
}
14601463

1461-
func queryMetaCacheKey(field string, value *regexp.Regexp) string {
1462-
sum := sha256.Sum256([]byte(fmt.Sprintf("%s:%s", field, value.String())))
1463-
return fmt.Sprintf("Meta:%x", sum[:])
1464+
func queryMetaChecksum(field string, value *regexp.Regexp) string {
1465+
h := xxhash.New()
1466+
h.Write([]byte(field))
1467+
h.Write([]byte{':'})
1468+
h.Write([]byte(value.String()))
1469+
return fmt.Sprintf("%x", h.Sum64())
14641470
}
14651471

1466-
func queryRepoIdsCacheKey(repos []zoekt.Repository) string {
1467-
var b strings.Builder
1472+
func queryRepoIdsChecksum(repos []zoekt.Repository) string {
1473+
h := xxhash.New()
14681474
for _, r := range repos {
1469-
b.WriteString(fmt.Sprint(r.ID))
1470-
b.WriteByte(',')
1475+
h.Write([]byte(fmt.Sprint(r.ID)))
1476+
h.Write([]byte{','})
14711477
}
1472-
sum := sha256.Sum256([]byte(b.String()))
1473-
return fmt.Sprintf("RepoIDs:%x", sum[:])
1478+
return fmt.Sprintf("%x", h.Sum64())
14741479
}

index/matchtree_test.go

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -379,9 +379,11 @@ func TestRepoIDs(t *testing.T) {
379379
}
380380

381381
// Check that the docMatchTree cache is populated correctly
382-
key := queryRepoIdsCacheKey(d.repoMetaData)
383-
if _, ok := d.docMatchTreeCache[key]; !ok {
384-
t.Errorf("expected docMatchTreeCache to be populated for key %q", key)
382+
checksum := queryRepoIdsChecksum(d.repoMetaData)
383+
cacheKey := struct{ field, value string }{"RepoIDs", checksum}
384+
385+
if _, ok := d.docMatchTreeCache[cacheKey]; !ok {
386+
t.Errorf("expected docMatchTreeCache to be populated for key %q", cacheKey)
385387
}
386388

387389
want := []uint32{2, 4, 5}
@@ -456,9 +458,11 @@ func TestMetaQueryMatchTree(t *testing.T) {
456458
}
457459

458460
// Check that the docMatchTree cache is populated correctly
459-
key := queryMetaCacheKey("license", regexp.MustCompile("M.T"))
460-
if _, ok := d.docMatchTreeCache[key]; !ok {
461-
t.Errorf("expected docMatchTreeCache to be populated for key %q", key)
461+
checksum := queryMetaChecksum("license", regexp.MustCompile("M.T"))
462+
cacheKey := struct{ field, value string }{"Meta", checksum}
463+
464+
if _, ok := d.docMatchTreeCache[cacheKey]; !ok {
465+
t.Errorf("expected docMatchTreeCache to be populated for key %q", cacheKey)
462466
}
463467

464468
var matched []uint32
@@ -483,17 +487,13 @@ func Test_queryMetaCacheKey(t *testing.T) {
483487
pattern string
484488
wantKey string
485489
}{
486-
// Generated via:
487-
// echo -n 'metaField:foo.*bar' | sha256sum
488-
{"metaField", "foo.*bar", "Meta:afc6e783c05767285e8657c92c6af09bd8c72d4c0cabe36614b0b2ba3b697724"},
489-
// echo -n 'metaField:foo.*baz' | sha256sum
490-
{"metaField", "foo.*baz", "Meta:7c5d6616ad2a00042e3ecb1d55cd4ef1907c5b3c232011e45a7f7ba7e8143b63"},
491-
// echo -n 'otherField:foo.*bar' | sha256sum
492-
{"otherField", "foo.*bar", "Meta:5761c1b19ae8b1c34c5933c8ddb4fe696d80918184547ad42e4953b15700f0ef"},
490+
{"metaField", "foo.*bar", "24e88a5ffec04af0"},
491+
{"metaField", "foo.*baz", "d8d6f6a7f0725b61"},
492+
{"otherField", "foo.*bar", "c9d07e17c028364"},
493493
}
494494
for _, tc := range cases {
495495
re := regexp.MustCompile(tc.pattern)
496-
key := queryMetaCacheKey(tc.field, re)
496+
key := queryMetaChecksum(tc.field, re)
497497
if key != tc.wantKey {
498498
t.Errorf("unexpected key for field=%q pattern=%q: got %q, want %q", tc.field, tc.pattern, key, tc.wantKey)
499499
}
@@ -505,21 +505,17 @@ func Test_queryRepoIdsCacheKey(t *testing.T) {
505505
repos []zoekt.Repository
506506
wantKey string
507507
}{
508-
// Generated via:
509-
// echo -n '123,456,' | sha256sum
510-
{[]zoekt.Repository{{ID: 123}, {ID: 456}}, "RepoIDs:a160b50b57496a46824c7e22f8c7047dbbec38752fa1b066d3f50d9f33baaddc"},
511-
// echo -n '456,123,' | sha256sum
512-
{[]zoekt.Repository{{ID: 456}, {ID: 123}}, "RepoIDs:1d899c857ed96d50e2ad5a9f1505a4a988a69375ec142c8bd29b1aaa545facfb"},
513-
// echo -n '123,456,789,' | sha256sum
514-
{[]zoekt.Repository{{ID: 123}, {ID: 456}, {ID: 789}}, "RepoIDs:d2c687720e021d3c3d3b8ae461451e144148d84deca4d45d40523f8501c72c39"},
508+
{[]zoekt.Repository{{ID: 123}, {ID: 456}}, "949bef4eacf1f176"},
509+
{[]zoekt.Repository{{ID: 456}, {ID: 123}}, "410563affd1b00fa"},
510+
{[]zoekt.Repository{{ID: 123}, {ID: 456}, {ID: 789}}, "876a12b235f36aa8"},
515511
}
516512
for _, tc := range cases {
517-
key := queryRepoIdsCacheKey(tc.repos)
513+
key := queryRepoIdsChecksum(tc.repos)
518514
if key != tc.wantKey {
519515
t.Errorf("unexpected key for repos=%v: got %q, want %q", tc.repos, key, tc.wantKey)
520516
}
521517
// Check determinism
522-
key2 := queryRepoIdsCacheKey(tc.repos)
518+
key2 := queryRepoIdsChecksum(tc.repos)
523519
if key != key2 {
524520
t.Errorf("key not deterministic for repos=%v: %q vs %q", tc.repos, key, key2)
525521
}

0 commit comments

Comments
 (0)