Skip to content

Commit 4fbe1b5

Browse files
author
John Mason
committed
Add shard filtering for meta queries
1 parent 90faf6d commit 4fbe1b5

File tree

2 files changed

+193
-1
lines changed

2 files changed

+193
-1
lines changed

search/shards.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,22 @@ func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, que
436436
}
437437
return false
438438
})
439+
case *query.Meta:
440+
// Meta queries filter repositories based on metadata fields.
441+
// By checking this at the shard level, we can skip entire shards
442+
// that don't contain any matching repositories, avoiding expensive
443+
// I/O operations.
444+
setSize = 0 // Unknown size, we'll filter based on metadata
445+
hasRepos = hasReposForPredicate(func(repo *zoekt.Repository) bool {
446+
if repo.Metadata == nil {
447+
return false
448+
}
449+
v, ok := repo.Metadata[setQuery.Field]
450+
if !ok {
451+
return false
452+
}
453+
return setQuery.Value.MatchString(v)
454+
})
439455
default:
440456
continue
441457
}
@@ -486,7 +502,7 @@ func doSelectRepoSet(shards []*rankedShard, and *query.And) ([]*rankedShard, que
486502
// shard indexData.simplify will simplify to (and true (content baz)) ->
487503
// (content baz). This work can be done now once, rather than per shard.
488504
switch c := c.(type) {
489-
case *query.RepoSet, *query.RepoIDs, *query.Repo:
505+
case *query.RepoSet, *query.RepoIDs, *query.Repo, *query.Meta:
490506
and.Children[i] = &query.Const{Value: true}
491507
return filtered, query.Simplify(and)
492508

search/shards_test.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,182 @@ func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
387387
}
388388
}
389389

390+
func TestFilteringShardsByMeta(t *testing.T) {
391+
ss := newShardedSearcher(1)
392+
393+
// Create repos with different metadata values
394+
// We'll create 30 repos total:
395+
// - 10 with nickname="project-A"
396+
// - 10 with nickname="project-B"
397+
// - 10 with no metadata
398+
n := 30
399+
projectARepos := []string{}
400+
projectBRepos := []string{}
401+
402+
// Common document that will be in all repos
403+
doc := index.Document{
404+
Name: "common.go",
405+
Content: []byte("needle haystack"),
406+
}
407+
408+
for i := range n {
409+
shardName := fmt.Sprintf("shard%d", i)
410+
repoName := fmt.Sprintf("repository%.3d", i)
411+
412+
var metadata map[string]string
413+
if i < 10 {
414+
// First 10 repos have project-A
415+
metadata = map[string]string{"nickname": "project-A", "visibility": "public"}
416+
projectARepos = append(projectARepos, repoName)
417+
} else if i < 20 {
418+
// Next 10 repos have project-B
419+
metadata = map[string]string{"nickname": "project-B", "visibility": "private"}
420+
projectBRepos = append(projectBRepos, repoName)
421+
}
422+
// Last 10 repos have no metadata
423+
424+
repo := &zoekt.Repository{
425+
ID: uint32(i + 1),
426+
Name: repoName,
427+
Metadata: metadata,
428+
}
429+
430+
ss.replace(map[string]zoekt.Searcher{
431+
shardName: searcherForTest(t, testShardBuilder(t, repo, doc)),
432+
})
433+
}
434+
435+
// Test 1: Search without Meta filter - should search all shards
436+
res, err := ss.Search(context.Background(), &query.Substring{Pattern: "needle"}, &zoekt.SearchOptions{})
437+
if err != nil {
438+
t.Fatalf("Search without filter: %v", err)
439+
}
440+
if len(res.Files) != n {
441+
t.Fatalf("no meta filter: got %d results, want %d", len(res.Files), n)
442+
}
443+
444+
sub := &query.Substring{Pattern: "needle"}
445+
446+
// Helper function to extract unique repo names from search results
447+
getRepoNames := func(files []zoekt.FileMatch) []string {
448+
repoSet := make(map[string]struct{})
449+
for _, f := range files {
450+
repoSet[f.Repository] = struct{}{}
451+
}
452+
repos := make([]string, 0, len(repoSet))
453+
for repo := range repoSet {
454+
repos = append(repos, repo)
455+
}
456+
sort.Strings(repos)
457+
return repos
458+
}
459+
460+
// Test 2: Filter by nickname="project-A" - should only search 10 shards
461+
metaQueryA := &query.Meta{
462+
Field: "nickname",
463+
Value: regexp.MustCompile("^project-A$"),
464+
}
465+
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryA, sub), &zoekt.SearchOptions{})
466+
if err != nil {
467+
t.Fatalf("Search with Meta filter A: %v", err)
468+
}
469+
gotRepos := getRepoNames(res.Files)
470+
wantRepos := append([]string{}, projectARepos...)
471+
sort.Strings(wantRepos)
472+
if !reflect.DeepEqual(gotRepos, wantRepos) {
473+
t.Fatalf("Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
474+
}
475+
476+
// Test 3: Filter by nickname="project-B" - should only search 10 shards
477+
metaQueryB := &query.Meta{
478+
Field: "nickname",
479+
Value: regexp.MustCompile("^project-B$"),
480+
}
481+
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryB, sub), &zoekt.SearchOptions{})
482+
if err != nil {
483+
t.Fatalf("Search with Meta filter B: %v", err)
484+
}
485+
gotRepos = getRepoNames(res.Files)
486+
wantRepos = append([]string{}, projectBRepos...)
487+
sort.Strings(wantRepos)
488+
if !reflect.DeepEqual(gotRepos, wantRepos) {
489+
t.Fatalf("Meta(nickname=project-B):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
490+
}
491+
492+
// Test 4: Filter by visibility="public" - should only search 10 shards (project-A repos)
493+
metaQueryPublic := &query.Meta{
494+
Field: "visibility",
495+
Value: regexp.MustCompile("^public$"),
496+
}
497+
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryPublic, sub), &zoekt.SearchOptions{})
498+
if err != nil {
499+
t.Fatalf("Search with Meta filter public: %v", err)
500+
}
501+
gotRepos = getRepoNames(res.Files)
502+
wantRepos = append([]string{}, projectARepos...)
503+
sort.Strings(wantRepos)
504+
if !reflect.DeepEqual(gotRepos, wantRepos) {
505+
t.Fatalf("Meta(visibility=public):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
506+
}
507+
508+
// Test 5: Filter by non-existent field - should return 0 results
509+
metaQueryNonExistent := &query.Meta{
510+
Field: "nonexistent_field",
511+
Value: regexp.MustCompile(".*"),
512+
}
513+
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryNonExistent, sub), &zoekt.SearchOptions{})
514+
if err != nil {
515+
t.Fatalf("Search with Meta filter non-existent: %v", err)
516+
}
517+
if len(res.Files) != 0 {
518+
t.Fatalf("Meta(nonexistent_field): got %d results, want 0", len(res.Files))
519+
}
520+
521+
// Test 6: Filter by regex pattern matching multiple values
522+
metaQueryRegex := &query.Meta{
523+
Field: "nickname",
524+
Value: regexp.MustCompile("project-.*"), // Matches both project-A and project-B
525+
}
526+
res, err = ss.Search(context.Background(), query.NewAnd(metaQueryRegex, sub), &zoekt.SearchOptions{})
527+
if err != nil {
528+
t.Fatalf("Search with Meta regex filter: %v", err)
529+
}
530+
gotRepos = getRepoNames(res.Files)
531+
wantRepos = append(append([]string{}, projectARepos...), projectBRepos...)
532+
sort.Strings(wantRepos)
533+
if !reflect.DeepEqual(gotRepos, wantRepos) {
534+
t.Fatalf("Meta(nickname=project-.*):\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
535+
}
536+
537+
// Test 7: Test that Meta query alone (without content search) works
538+
res, err = ss.Search(context.Background(), metaQueryA, &zoekt.SearchOptions{})
539+
if err != nil {
540+
t.Fatalf("Search with Meta query alone: %v", err)
541+
}
542+
gotRepos = getRepoNames(res.Files)
543+
wantRepos = append([]string{}, projectARepos...)
544+
sort.Strings(wantRepos)
545+
if !reflect.DeepEqual(gotRepos, wantRepos) {
546+
t.Fatalf("Meta query alone:\ngot repos: %v\nwant repos: %v", gotRepos, wantRepos)
547+
}
548+
549+
// Test 8: Test with List operation (not just Search)
550+
listRes, err := ss.List(context.Background(), metaQueryA, nil)
551+
if err != nil {
552+
t.Fatalf("List with Meta filter: %v", err)
553+
}
554+
gotListRepos := make([]string, len(listRes.Repos))
555+
for i, r := range listRes.Repos {
556+
gotListRepos[i] = r.Repository.Name
557+
}
558+
sort.Strings(gotListRepos)
559+
wantRepos = append([]string{}, projectARepos...)
560+
sort.Strings(wantRepos)
561+
if !reflect.DeepEqual(gotListRepos, wantRepos) {
562+
t.Fatalf("List with Meta(nickname=project-A):\ngot repos: %v\nwant repos: %v", gotListRepos, wantRepos)
563+
}
564+
}
565+
390566
func hash(name string) uint32 {
391567
h := fnv.New32()
392568
h.Write([]byte(name))

0 commit comments

Comments
 (0)