Skip to content

Commit 90faf6d

Browse files
authored
Add support for indexing submodules without repocache (#985)
* Fix git.Open() fs argument for .git dir instead of worktree The filesystem argument submitted to git.Open() is of the .git directory, it should be for the worktree, as in go-git this logic is copied from: https://github.com/go-git/go-git/blob/145daf2492dd86b1d7e7787555ebd9837b93bff8/repository.go#L373-L375 One side effect of this bug was that go-git would refuse to fetch repo submodules * Fix missing RepoURLs and LineFragments for result subrepositories RepoURLs for subrepositories are currently filtered out when results are streamed per-repo from shards. * Add submodule indexing without repo cache When not using repo cache, index git submodules recursively into subrepos using the go-git API * Adjust subrepo naming conventions - Revert repo-cache behavior to unchanged - Add support for naming repos and modules with no remote (use the dirname for the former and subrepopath for the latter) - Add test for submodule no-repo-cache behavior * Fix overallocation in repoURL and lineFragment filtering * Revert basename treatment from 08f6759 Missed the fact that it is already done in zoekt-git-index command, the test was improperly configured https://github.com/sourcegraph/zoekt/blob/4e4a529c3b63c7d4c7897ba736f1cd52cc163134/cmd/zoekt-git-index/main.go#L93-L100
1 parent e1cef6f commit 90faf6d

File tree

4 files changed

+207
-10
lines changed

4 files changed

+207
-10
lines changed

gitindex/index.go

Lines changed: 108 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,44 @@ func setTemplatesFromConfig(desc *zoekt.Repository, repoDir string) error {
282282
return nil
283283
}
284284

285+
// This attempts to get a repo URL similar to the main repository template processing as in setTemplatesFromConfig()
286+
func normalizeSubmoduleRemoteURL(cfg *config.Config) (string, error) {
287+
sec := cfg.Raw.Section("zoekt")
288+
remoteURL := sec.Options.Get("web-url")
289+
if remoteURL == "" {
290+
// fall back to "origin" remote
291+
remoteURL = configLookupRemoteURL(cfg, "origin")
292+
if remoteURL == "" {
293+
return "", nil
294+
}
295+
}
296+
297+
if sm := sshRelativeURLRegexp.FindStringSubmatch(remoteURL); sm != nil {
298+
user := sm[1]
299+
host := sm[2]
300+
path := sm[3]
301+
302+
remoteURL = fmt.Sprintf("ssh+git://%s@%s/%s", user, host, path)
303+
}
304+
305+
u, err := url.Parse(remoteURL)
306+
if err != nil {
307+
return "", fmt.Errorf("unable to parse remote URL %q: %w", remoteURL, err)
308+
}
309+
310+
if u.Scheme == "ssh+git" {
311+
u.Scheme = "https"
312+
u.User = nil
313+
}
314+
315+
// Assume we cannot build templates for this URL, leave it empty
316+
if u.Scheme == "" {
317+
return "", nil
318+
}
319+
320+
return u.String(), nil
321+
}
322+
285323
// SetTemplatesFromOrigin fills in templates based on the origin URL.
286324
func SetTemplatesFromOrigin(desc *zoekt.Repository, u *url.URL) error {
287325
desc.Name = filepath.Join(u.Host, strings.TrimSuffix(u.Path, ".git"))
@@ -495,8 +533,13 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
495533
tpl := opts.BuildOptions.RepositoryDescription
496534
if path != "" {
497535
tpl = zoekt.Repository{URL: info.URL.String()}
498-
if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil {
499-
log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err)
536+
if info.URL.String() != "" {
537+
if err := SetTemplatesFromOrigin(&tpl, info.URL); err != nil {
538+
log.Printf("setTemplatesFromOrigin(%s, %s): %s", path, info.URL, err)
539+
}
540+
}
541+
if tpl.Name == "" {
542+
tpl.Name = path
500543
}
501544
}
502545
opts.BuildOptions.SubRepositories[path] = &tpl
@@ -569,6 +612,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
569612
// It copies the relevant logic from git.PlainOpen, and tweaks certain filesystem options.
570613
func openRepo(repoDir string) (*git.Repository, io.Closer, error) {
571614
fs := osfs.New(repoDir)
615+
wt := fs
572616

573617
// Check if the root directory exists.
574618
if _, err := fs.Stat(""); err != nil {
@@ -591,7 +635,7 @@ func openRepo(repoDir string) (*git.Repository, io.Closer, error) {
591635
})
592636

593637
// Because we're keeping descriptors open, we need to close the storage object when we're done.
594-
repo, err := git.Open(s, fs)
638+
repo, err := git.Open(s, wt)
595639
return repo, s, err
596640
}
597641

@@ -830,10 +874,13 @@ func prepareDeltaBuild(options Options, repository *git.Repository) (repos map[f
830874

831875
func prepareNormalBuild(options Options, repository *git.Repository) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) {
832876
var repoCache *RepoCache
833-
if options.Submodules {
877+
if options.Submodules && options.RepoCacheDir != "" {
834878
repoCache = NewRepoCache(options.RepoCacheDir)
835879
}
880+
return prepareNormalBuildRecurse(options, repository, repoCache, false)
881+
}
836882

883+
func prepareNormalBuildRecurse(options Options, repository *git.Repository, repoCache *RepoCache, isSubrepo bool) (repos map[fileKey]BlobLocation, branchVersions map[string]map[string]plumbing.Hash, err error) {
837884
// Branch => Repo => SHA1
838885
branchVersions = map[string]map[string]plumbing.Hash{}
839886

@@ -842,7 +889,22 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[
842889
return nil, nil, fmt.Errorf("expandBranches: %w", err)
843890
}
844891

845-
rw := NewRepoWalker(repository, options.BuildOptions.RepositoryDescription.URL, repoCache)
892+
repoURL := options.BuildOptions.RepositoryDescription.URL
893+
894+
if isSubrepo {
895+
cfg, err := repository.Config()
896+
if err != nil {
897+
return nil, nil, fmt.Errorf("unable to get repository config: %w", err)
898+
}
899+
900+
u, err := normalizeSubmoduleRemoteURL(cfg)
901+
if err != nil {
902+
return nil, nil, fmt.Errorf("failed to identify subrepository URL: %w", err)
903+
}
904+
repoURL = u
905+
}
906+
907+
rw := NewRepoWalker(repository, repoURL, repoCache)
846908
for _, b := range branches {
847909
commit, err := getCommit(repository, options.BranchPrefix, b)
848910
if err != nil {
@@ -871,6 +933,47 @@ func prepareNormalBuild(options Options, repository *git.Repository) (repos map[
871933
branchVersions[b] = subVersions
872934
}
873935

936+
// Index submodules using go-git if we didn't do so using the repo cache
937+
if options.Submodules && options.RepoCacheDir == "" {
938+
worktree, err := repository.Worktree()
939+
if err != nil {
940+
return nil, nil, fmt.Errorf("failed to get repository worktree: %w", err)
941+
}
942+
943+
submodules, err := worktree.Submodules()
944+
if err != nil {
945+
return nil, nil, fmt.Errorf("failed to get submodules: %w", err)
946+
}
947+
948+
for _, submodule := range submodules {
949+
subRepository, err := submodule.Repository()
950+
if err != nil {
951+
log.Printf("failed to open submodule repository: %s, %s", submodule.Config().Name, err)
952+
continue
953+
}
954+
955+
sw, subVersions, err := prepareNormalBuildRecurse(options, subRepository, repoCache, true)
956+
if err != nil {
957+
log.Printf("failed to index submodule repository: %s, %s", submodule.Config().Name, err)
958+
continue
959+
}
960+
961+
log.Printf("adding subrepository files from: %s", submodule.Config().Name)
962+
963+
for k, repo := range sw {
964+
rw.Files[fileKey{
965+
SubRepoPath: filepath.Join(submodule.Config().Path, k.SubRepoPath),
966+
Path: k.Path,
967+
ID: k.ID,
968+
}] = repo
969+
}
970+
971+
for k, v := range subVersions {
972+
branchVersions[filepath.Join(submodule.Config().Path, k)] = v
973+
}
974+
}
975+
}
976+
874977
return rw.Files, branchVersions, nil
875978
}
876979

gitindex/tree.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,16 @@ func (rw *RepoWalker) handleSubmodule(p string, id *plumbing.Hash, branch string
178178
}
179179

180180
func (rw *RepoWalker) handleEntry(p string, e *object.TreeEntry, branch string, subRepoVersions map[string]plumbing.Hash, ig *ignore.Matcher) error {
181-
if e.Mode == filemode.Submodule && rw.repoCache != nil {
182-
if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil {
183-
return fmt.Errorf("submodule %s: %v", p, err)
181+
if e.Mode == filemode.Submodule {
182+
if rw.repoCache != nil {
183+
// Index the submodule using repo cache
184+
if err := rw.tryHandleSubmodule(p, &e.Hash, branch, subRepoVersions, ig); err != nil {
185+
return fmt.Errorf("submodule %s: %v", p, err)
186+
}
187+
} else {
188+
// Record the commit ID for the submodule path
189+
// This will be the submodule's commit hash, not the parent's
190+
subRepoVersions[p] = e.Hash
184191
}
185192
}
186193

gitindex/tree_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,70 @@ func TestSubmoduleIndex(t *testing.T) {
258258
}
259259
}
260260

261+
func TestSubmoduleIndexWithoutRepocache(t *testing.T) {
262+
dir := t.TempDir()
263+
264+
if err := createSubmoduleRepo(dir); err != nil {
265+
t.Fatalf("createSubmoduleRepo: %v", err)
266+
}
267+
268+
indexDir := t.TempDir()
269+
270+
buildOpts := index.Options{
271+
RepositoryDescription: zoekt.Repository{Name: "adir"},
272+
IndexDir: indexDir,
273+
}
274+
opts := Options{
275+
RepoDir: filepath.Join(dir, "adir"),
276+
BuildOptions: buildOpts,
277+
BranchPrefix: "refs/heads/",
278+
Branches: []string{"master"},
279+
Submodules: true,
280+
Incremental: true,
281+
}
282+
if _, err := IndexGitRepo(opts); err != nil {
283+
t.Fatalf("IndexGitRepo: %v", err)
284+
}
285+
286+
searcher, err := search.NewDirectorySearcher(indexDir)
287+
if err != nil {
288+
t.Fatal("NewDirectorySearcher", err)
289+
}
290+
defer searcher.Close()
291+
292+
results, err := searcher.Search(context.Background(),
293+
&query.Substring{Pattern: "bcont"},
294+
&zoekt.SearchOptions{})
295+
if err != nil {
296+
t.Fatal("Search", err)
297+
}
298+
299+
if len(results.Files) != 1 {
300+
t.Fatalf("got search result %v, want 1 file", results.Files)
301+
}
302+
303+
file := results.Files[0]
304+
if got, want := file.SubRepositoryName, "bname"; got != want {
305+
t.Errorf("got subrepo name %q, want %q", got, want)
306+
}
307+
if got, want := file.SubRepositoryPath, "bname"; got != want {
308+
t.Errorf("got subrepo path %q, want %q", got, want)
309+
}
310+
311+
subVersion := file.Version
312+
if len(subVersion) != 40 {
313+
t.Fatalf("got %q, want hex sha1", subVersion)
314+
}
315+
316+
if results, err := searcher.Search(context.Background(), &query.Substring{Pattern: "acont"}, &zoekt.SearchOptions{}); err != nil {
317+
t.Fatalf("Search('acont'): %v", err)
318+
} else if len(results.Files) != 1 {
319+
t.Errorf("got %v, want 1 result", results.Files)
320+
} else if f := results.Files[0]; f.Version == subVersion {
321+
t.Errorf("version in super repo matched version is subrepo.")
322+
}
323+
}
324+
261325
func createSymlinkRepo(dir string) error {
262326
if err := os.MkdirAll(dir, 0o755); err != nil {
263327
return err

search/shards.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -816,15 +816,38 @@ func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sen
816816

817817
send := func(repoName string, a, b int, stats zoekt.Stats) {
818818
index.SortFiles(result.Files[a:b])
819+
820+
filteredRepoURLs := map[string]string{repoName: result.RepoURLs[repoName]}
821+
filteredLineFragments := map[string]string{repoName: result.LineFragments[repoName]}
822+
823+
// Filter RepoURLs and LineFragments to only those of repoName and its
824+
// subRepositories if there are subRepositories
825+
for _, file := range result.Files[a:b] {
826+
name := file.SubRepositoryName
827+
if name == "" {
828+
continue
829+
}
830+
_, repoSet := filteredRepoURLs[name]
831+
url, ok := result.RepoURLs[name]
832+
if !repoSet && ok {
833+
filteredRepoURLs[name] = url
834+
}
835+
_, fragSet := filteredLineFragments[name]
836+
frag, ok := result.LineFragments[name]
837+
if !fragSet && ok {
838+
filteredLineFragments[name] = frag
839+
}
840+
}
841+
819842
sender.Send(&zoekt.SearchResult{
820843
Stats: stats,
821844
Progress: zoekt.Progress{
822845
Priority: result.Files[a].RepositoryPriority,
823846
MaxPendingPriority: result.MaxPendingPriority,
824847
},
825848
Files: result.Files[a:b],
826-
RepoURLs: map[string]string{repoName: result.RepoURLs[repoName]},
827-
LineFragments: map[string]string{repoName: result.LineFragments[repoName]},
849+
RepoURLs: filteredRepoURLs,
850+
LineFragments: filteredLineFragments,
828851
})
829852
}
830853

0 commit comments

Comments
 (0)