Skip to content

Commit e88cece

Browse files
fix: limit the size of the git clone (#1111)
* fix: limit the size of the git clone fix: set a max repo size while cloning Signed-off-by: Harikrishnan Balagopal <[email protected]> * chore: update go version to be able to use atomic counters atomic.Int64 Signed-off-by: Harikrishnan Balagopal <[email protected]> * fixup! chore: update go version to be able to use atomic counters atomic.Int64 Signed-off-by: Harikrishnan Balagopal <[email protected]> --------- Signed-off-by: Harikrishnan Balagopal <[email protected]>
1 parent af4d50b commit e88cece

File tree

13 files changed

+281
-149
lines changed

13 files changed

+281
-149
lines changed

cmd/flags.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ const (
5252
qadisablecliFlag = "qa-disable-cli"
5353
qaportFlag = "qa-port"
5454
planProgressPortFlag = "plan-progress-port"
55+
maxCloneSizeBytesFlag = "max-clone-size"
5556
transformerSelectorFlag = "transformer-selector"
5657
qaEnabledCategoriesFlag = "qa-enable"
5758
qaDisabledCategoriesFlag = "qa-disable"

cmd/plan.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
)
3737

3838
type planFlags struct {
39+
maxVCSRepoCloneSize int64
3940
progressServerPort int
4041
planfile string
4142
srcpath string
@@ -65,6 +66,8 @@ func planHandler(cmd *cobra.Command, flags planFlags) {
6566
}()
6667
defer lib.Destroy()
6768

69+
vcs.SetMaxRepoCloneSize(flags.maxVCSRepoCloneSize)
70+
6871
var err error
6972
planfile := flags.planfile
7073
srcpath := flags.srcpath
@@ -182,6 +185,7 @@ func GetPlanCommand() *cobra.Command {
182185
planCmd.Flags().StringSliceVar(&flags.preSets, preSetFlag, []string{}, "Specify preset config to use.")
183186
planCmd.Flags().StringArrayVar(&flags.setconfigs, setConfigFlag, []string{}, "Specify config key-value pairs.")
184187
planCmd.Flags().IntVar(&flags.progressServerPort, planProgressPortFlag, 0, "Port for the plan progress server. If not provided, the server won't be started.")
188+
planCmd.Flags().Int64Var(&flags.maxVCSRepoCloneSize, maxCloneSizeBytesFlag, -1, "Max size in bytes when cloning a git repo. Default -1 is infinite")
185189
planCmd.Flags().BoolVar(&flags.disableLocalExecution, common.DisableLocalExecutionFlag, false, "Allow files to be executed locally.")
186190
planCmd.Flags().BoolVar(&flags.failOnEmptyPlan, common.FailOnEmptyPlan, false, "If true, planning will exit with a failure exit code if no services are detected (and no default transformers are found).")
187191

cmd/transform.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ import (
3838

3939
type transformFlags struct {
4040
qaflags
41+
// maxVCSRepoCloneSize is the maximum size in bytes for cloning repos
42+
maxVCSRepoCloneSize int64
4143
// ignoreEnv tells us whether to use data collected from the local machine
4244
ignoreEnv bool
4345
// disableLocalExecution disables execution of executables locally
@@ -72,6 +74,7 @@ func transformHandler(cmd *cobra.Command, flags transformFlags) {
7274
}
7375
defer pprof.StopCPUProfile()
7476
}
77+
vcs.SetMaxRepoCloneSize(flags.maxVCSRepoCloneSize)
7578

7679
ctx, cancel := context.WithCancel(cmd.Context())
7780
logrus.AddHook(common.NewCleanupHook(cancel))
@@ -250,7 +253,14 @@ func transformHandler(cmd *cobra.Command, flags transformFlags) {
250253
}
251254
startQA(flags.qaflags)
252255
}
253-
if err := lib.Transform(ctx, transformationPlan, preExistingPlan, flags.outpath, flags.transformerSelector, flags.maxIterations); err != nil {
256+
if err := lib.Transform(
257+
ctx,
258+
transformationPlan,
259+
preExistingPlan,
260+
flags.outpath,
261+
flags.transformerSelector,
262+
flags.maxIterations,
263+
); err != nil {
254264
logrus.Fatalf("failed to transform. Error: %q", err)
255265
}
256266
logrus.Infof("Transformed target artifacts can be found at [%s].", flags.outpath)
@@ -290,6 +300,7 @@ func GetTransformCommand() *cobra.Command {
290300
transformCmd.Flags().StringVarP(&flags.customizationsPath, customizationsFlag, "c", "", "Specify directory or a git url (see https://move2kube.konveyor.io/concepts/git-support) where customizations are stored. By default we look for "+common.DefaultCustomizationDir)
291301
transformCmd.Flags().StringVarP(&flags.transformerSelector, transformerSelectorFlag, "t", "", "Specify the transformer selector.")
292302
transformCmd.Flags().BoolVar(&flags.qaskip, qaSkipFlag, false, "Enable/disable the default answers to questions posed in QA Cli sub-system. If disabled, you will have to answer the questions posed by QA during interaction.")
303+
transformCmd.Flags().Int64Var(&flags.maxVCSRepoCloneSize, maxCloneSizeBytesFlag, -1, "Max size in bytes when cloning a git repo. Default -1 is infinite")
293304

294305
// QA options
295306
transformCmd.Flags().StringSliceVar(&flags.qaEnabledCategories, qaEnabledCategoriesFlag, []string{}, "Specify the QA categories to enable (cannot be used in conjunction with qa-disable)")

common/vcs/git.go

Lines changed: 71 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,16 @@ import (
2424
"strings"
2525
"time"
2626

27+
"github.com/go-git/go-billy/v5"
28+
"github.com/go-git/go-billy/v5/osfs"
2729
"github.com/go-git/go-git/v5"
2830
"github.com/go-git/go-git/v5/config"
2931
"github.com/go-git/go-git/v5/plumbing"
32+
"github.com/go-git/go-git/v5/plumbing/cache"
3033
"github.com/go-git/go-git/v5/plumbing/object"
3134
"github.com/go-git/go-git/v5/plumbing/transport/http"
3235
"github.com/go-git/go-git/v5/plumbing/transport/ssh"
36+
"github.com/go-git/go-git/v5/storage/filesystem"
3337
"github.com/konveyor/move2kube/common"
3438
"github.com/konveyor/move2kube/qaengine"
3539
"github.com/sirupsen/logrus"
@@ -47,6 +51,11 @@ type GitVCSRepo struct {
4751
GitRepoPath string
4852
}
4953

54+
var (
55+
// for https or ssh git repo urls
56+
gitVCSRegex = regexp.MustCompile(`^git\+(https|ssh)://[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}(:[0-9]{1,5})?(\/.*)?$`)
57+
)
58+
5059
func isGitCommitHash(commithash string) bool {
5160
gitCommitHashRegex := regexp.MustCompile(`^[a-fA-F0-9]{40}$`)
5261
return gitCommitHashRegex.MatchString(commithash)
@@ -112,26 +121,23 @@ func getGitRepoStruct(vcsurl string) (*GitVCSRepo, error) {
112121

113122
}
114123

115-
// isGitVCS checks if the given vcs url is git
124+
// isGitVCS checks if the given vcs url is a git repo url
116125
func isGitVCS(vcsurl string) bool {
117-
// for https or ssh
118-
gitVCSRegex := `^git\+(https|ssh)://[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,5}(:[0-9]{1,5})?(\/.*)?$`
119-
matched, err := regexp.MatchString(gitVCSRegex, vcsurl)
120-
if err != nil {
121-
logrus.Fatalf("failed to match the given vcsurl %v with the git vcs regex expression %v. Error : %v", vcsurl, gitVCSRegex, err)
122-
}
123-
return matched
126+
return gitVCSRegex.MatchString(vcsurl)
124127
}
125128

126-
func pushGitVCS(remotePath, folderName string) error {
129+
func pushGitVCS(remotePath, folderName string, maxSize int64) error {
127130
if !common.IgnoreEnvironment {
128131
logrus.Warnf("push to remote git repositories using credentials from the environment is not yet supported.")
129132
}
130133
remotePathSplitByAt := strings.Split(remotePath, "@")
131134
remotePathSplitByColon := strings.Split(remotePathSplitByAt[0], ":")
132135
isSSH := strings.HasPrefix(remotePath, "git+ssh")
133136
isHTTPS := strings.HasPrefix(remotePath, "git+https")
134-
gitFSPath := GetClonedPath(remotePath, folderName, false)
137+
gitFSPath, err := GetClonedPath(remotePath, folderName, false)
138+
if err != nil {
139+
return fmt.Errorf("failed to clone the repo. Error: %w", err)
140+
}
135141
if (isHTTPS && len(remotePathSplitByColon) > 2) || (isSSH && len(remotePathSplitByColon) > 2) {
136142
gitFSPath = strings.TrimSuffix(gitFSPath, remotePathSplitByColon[len(remotePathSplitByColon)-1])
137143
}
@@ -202,60 +208,73 @@ func pushGitVCS(remotePath, folderName string) error {
202208
return nil
203209
}
204210

205-
// Clone Clones a git repository with the given commit depth and path where to be cloned and returns final path
206-
func (gvcsrepo *GitVCSRepo) Clone(gitCloneOptions VCSCloneOptions) (string, error) {
207-
208-
if gitCloneOptions.CloneDestinationPath == "" {
209-
return "", fmt.Errorf("the path where the repository has to be clone is empty - %s", gitCloneOptions.CloneDestinationPath)
211+
// Clone clones a git repository with the given commit depth
212+
// and path where it is to be cloned and returns the final path inside the repo
213+
func (gvcsrepo *GitVCSRepo) Clone(cloneOptions VCSCloneOptions) (string, error) {
214+
if cloneOptions.CloneDestinationPath == "" {
215+
return "", fmt.Errorf("the path where the repository has to be cloned cannot be empty")
210216
}
211-
repoPath := filepath.Join(gitCloneOptions.CloneDestinationPath, gvcsrepo.GitRepoPath)
212-
_, err := os.Stat(repoPath)
213-
if os.IsNotExist(err) {
214-
logrus.Debugf("cloned output would be available at '%s'", repoPath)
215-
} else if gitCloneOptions.Overwrite {
216-
logrus.Infof("git repository might get overwritten at %s", repoPath)
217-
err = os.RemoveAll(repoPath)
218-
if err != nil {
219-
return "", fmt.Errorf("failed to remove the directory at the given path - %s", repoPath)
217+
repoPath := filepath.Join(cloneOptions.CloneDestinationPath, gvcsrepo.GitRepoPath)
218+
repoDirInfo, err := os.Stat(repoPath)
219+
if err != nil {
220+
if !os.IsNotExist(err) {
221+
return "", fmt.Errorf("failed to stat the git repo clone destination path '%s'. error: %w", repoPath, err)
220222
}
223+
logrus.Debugf("the cloned git repo will be available at '%s'", repoPath)
221224
} else {
222-
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
225+
if !cloneOptions.Overwrite {
226+
if !repoDirInfo.IsDir() {
227+
return "", fmt.Errorf("a file already exists at the git repo clone destination path '%s'", repoPath)
228+
}
229+
logrus.Infof("Assuming that the directory at '%s' is the cloned repo", repoPath)
230+
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil
231+
}
232+
logrus.Infof("git repository clone will overwrite the files/directories at '%s'", repoPath)
233+
if err := os.RemoveAll(repoPath); err != nil {
234+
return "", fmt.Errorf("failed to remove the files/directories at '%s' . error: %w", repoPath, err)
235+
}
236+
}
237+
logrus.Infof("Cloning the repository using git into '%s' . This might take some time.", cloneOptions.CloneDestinationPath)
238+
239+
// ------------
240+
var repoDirWt, dotGitDir billy.Filesystem
241+
repoDirWt = osfs.New(repoPath)
242+
dotGitDir, _ = repoDirWt.Chroot(git.GitDirName)
243+
fStorer := filesystem.NewStorage(dotGitDir, cache.NewObjectLRUDefault())
244+
limitStorer := Limit(fStorer, cloneOptions.MaxSize)
245+
// ------------
246+
247+
commitDepth := 1
248+
if cloneOptions.CommitDepth != 0 {
249+
commitDepth = cloneOptions.CommitDepth
223250
}
224-
logrus.Infof("Cloning the repository using git into %s. This might take some time.", gitCloneOptions.CloneDestinationPath)
225251
if gvcsrepo.Branch != "" {
226-
commitDepth := 1
227-
if gitCloneOptions.CommitDepth != 0 {
228-
commitDepth = gitCloneOptions.CommitDepth
229-
}
230252
cloneOpts := git.CloneOptions{
231253
URL: gvcsrepo.URL,
232254
Depth: commitDepth,
233255
SingleBranch: true,
234256
ReferenceName: plumbing.ReferenceName(fmt.Sprintf("refs/heads/%s", gvcsrepo.Branch)),
235257
}
236-
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
258+
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
237259
if err != nil {
238-
logrus.Debugf("provided branch %+v does not exist in the remote, therefore creating one.", gvcsrepo.Branch)
260+
logrus.Debugf("failed to clone the given branch '%s' . Will clone the entire repo and try again.", gvcsrepo.Branch)
239261
cloneOpts := git.CloneOptions{
240262
URL: gvcsrepo.URL,
241263
Depth: commitDepth,
242264
}
243-
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
265+
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
244266
if err != nil {
245-
return "", fmt.Errorf("failed to perform clone operation using git with options. Error : %+v", err)
267+
return "", fmt.Errorf("failed to perform clone operation using git. Error: %w", err)
246268
}
247269
branch := fmt.Sprintf("refs/heads/%s", gvcsrepo.Branch)
248270
b := plumbing.ReferenceName(branch)
249271
w, err := gvcsrepo.GitRepository.Worktree()
250272
if err != nil {
251-
return "", fmt.Errorf("failed return a worktree for the repostiory. Error : %+v", err)
273+
return "", fmt.Errorf("failed return a worktree for the repostiory. Error: %w", err)
252274
}
253-
254-
err = w.Checkout(&git.CheckoutOptions{Create: false, Force: false, Branch: b})
255-
256-
if err != nil {
257-
err := w.Checkout(&git.CheckoutOptions{Create: true, Force: false, Branch: b})
258-
if err != nil {
275+
if err := w.Checkout(&git.CheckoutOptions{Create: false, Force: false, Branch: b}); err != nil {
276+
logrus.Debugf("failed to checkout the branch '%s', creating it...", b)
277+
if err := w.Checkout(&git.CheckoutOptions{Create: true, Force: false, Branch: b}); err != nil {
259278
return "", fmt.Errorf("failed checkout a new branch. Error : %+v", err)
260279
}
261280
}
@@ -265,45 +284,41 @@ func (gvcsrepo *GitVCSRepo) Clone(gitCloneOptions VCSCloneOptions) (string, erro
265284
cloneOpts := git.CloneOptions{
266285
URL: gvcsrepo.URL,
267286
}
268-
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
287+
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
269288
if err != nil {
270-
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
289+
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error: %w", cloneOpts, err)
271290
}
272291
r, err := git.PlainOpen(repoPath)
273292
if err != nil {
274-
return "", fmt.Errorf("failed to open the git repository at the given path %+v. Error : %+v", repoPath, err)
293+
return "", fmt.Errorf("failed to open the git repository at the given path '%s' . Error: %w", repoPath, err)
275294
}
276295
w, err := r.Worktree()
277296
if err != nil {
278-
return "", fmt.Errorf("failed return a worktree for the repostiory %+v. Error : %+v", r, err)
297+
return "", fmt.Errorf("failed return a worktree for the repostiory %+v. Error: %w", r, err)
279298
}
280-
checkoutOpts := git.CheckoutOptions{
281-
Hash: commitHash,
282-
}
283-
err = w.Checkout(&checkoutOpts)
284-
if err != nil {
285-
return "", fmt.Errorf("failed to checkout commit hash : %s on work tree. Error : %+v", commitHash, w)
299+
checkoutOpts := git.CheckoutOptions{Hash: commitHash}
300+
if err := w.Checkout(&checkoutOpts); err != nil {
301+
return "", fmt.Errorf("failed to checkout commit hash '%s' on work tree. Error: %w", commitHash, err)
286302
}
287303
} else if gvcsrepo.Tag != "" {
288304
cloneOpts := git.CloneOptions{
289305
URL: gvcsrepo.URL,
290306
ReferenceName: plumbing.ReferenceName(fmt.Sprintf("refs/tags/%s", gvcsrepo.Tag)),
291307
}
292-
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
308+
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
293309
if err != nil {
294-
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
310+
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error: %w", cloneOpts, err)
295311
}
296312
} else {
297-
commitDepth := 1
298313
cloneOpts := git.CloneOptions{
299314
URL: gvcsrepo.URL,
300315
Depth: commitDepth,
301316
SingleBranch: true,
302317
ReferenceName: "refs/heads/main",
303318
}
304-
gvcsrepo.GitRepository, err = git.PlainClone(repoPath, false, &cloneOpts)
319+
gvcsrepo.GitRepository, err = git.Clone(limitStorer, repoDirWt, &cloneOpts)
305320
if err != nil {
306-
return "", fmt.Errorf("failed to perform clone operation using git with options %+v. Error : %+v", cloneOpts, err)
321+
return "", fmt.Errorf("failed to perform clone operation using git with options %+v and %+v. Error: %w", cloneOpts, cloneOptions, err)
307322
}
308323
}
309324
return filepath.Join(repoPath, gvcsrepo.PathWithinRepo), nil

common/vcs/git_test.go

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"testing"
2222

2323
"github.com/google/go-cmp/cmp"
24-
"github.com/konveyor/move2kube/common"
2524
)
2625

2726
func TestIsGitCommitHash(t *testing.T) {
@@ -125,45 +124,47 @@ func TestIsGitVCS(t *testing.T) {
125124
}
126125

127126
func TestClone(t *testing.T) {
128-
// Test case - clone a valid vcs url with overwrite true
127+
t.Log("Test case - clone a valid vcs url with overwrite true")
129128
gitURL := "git+https://github.com/konveyor/move2kube.git"
130129
repo, err := getGitRepoStruct(gitURL)
131130
if err != nil {
132-
t.Errorf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
131+
t.Fatalf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
133132
}
134133
overwrite := true
135-
tempPath, err := filepath.Abs(common.RemoteTempPath)
136-
if err != nil {
137-
t.Errorf("failed to get absolute path of %s. Error : %+v", common.RemoteTempPath, err)
134+
tempPath := t.TempDir()
135+
cloneDestPath := filepath.Join(tempPath, "test-clone")
136+
var infiniteSize int64 = -1
137+
cloneOpts := VCSCloneOptions{
138+
CommitDepth: 1,
139+
Overwrite: overwrite,
140+
CloneDestinationPath: cloneDestPath,
141+
MaxSize: infiniteSize,
138142
}
139-
folderName := "test-clone"
140-
cloneOpts := VCSCloneOptions{CommitDepth: 1, Overwrite: overwrite, CloneDestinationPath: filepath.Join(tempPath, folderName)}
141143
clonedPath, err := repo.Clone(cloneOpts)
142144
if err != nil {
143-
t.Errorf("failed to clone the git repo. Error : %+v", err)
145+
t.Fatalf("failed to clone the git repo. Error : %+v", err)
144146
}
145147

146-
// Test case 2 - Repository already exists with overwrite true
148+
t.Log("Test case 2 - Repository already exists with overwrite false")
147149
gitURL = "git+https://github.com/konveyor/move2kube.git"
148150
repo, err = getGitRepoStruct(gitURL)
149151
if err != nil {
150-
t.Errorf("failed to get git repo struct for the given git URL %s. Error : %+v", gitURL, err)
152+
t.Fatalf("failed to get git repo struct for the given git URL '%s' . Error : %+v", gitURL, err)
151153
}
152154
overwrite = false
153-
tempPath, err = filepath.Abs(common.RemoteTempPath)
154-
if err != nil {
155-
t.Errorf("failed to get absolute path of %s. Error : %+v", common.RemoteTempPath, err)
155+
cloneOpts = VCSCloneOptions{
156+
CommitDepth: 1,
157+
Overwrite: overwrite,
158+
CloneDestinationPath: cloneDestPath,
159+
MaxSize: infiniteSize,
156160
}
157-
folderName = "test-clone"
158-
cloneOpts = VCSCloneOptions{CommitDepth: 1, Overwrite: overwrite, CloneDestinationPath: filepath.Join(tempPath, folderName)}
159161
clonedPathWithoutOverwrite, err := repo.Clone(cloneOpts)
160162
if err != nil {
161-
t.Errorf("failed to clone the git repo. Error : %+v", err)
163+
t.Fatalf("failed to clone the git repo. Error : %+v", err)
162164
}
163165
if clonedPath != clonedPathWithoutOverwrite {
164-
t.Errorf("cloned paths did not match with overwrite false. cloned path %s, cloned path without overwrite: %s", clonedPath, clonedPathWithoutOverwrite)
166+
t.Fatalf("cloned paths did not match with overwrite false. cloned path '%s', cloned path without overwrite: '%s'", clonedPath, clonedPathWithoutOverwrite)
165167
}
166-
167168
}
168169

169170
func TestIsGitBranch(t *testing.T) {

0 commit comments

Comments
 (0)