Skip to content

Commit 6e25e87

Browse files
author
Kevin Westphal
committed
feat(git-hooks): set repo remote fallback with env variable
In some configurations ggshield runs in repositories without a configured remote, for example when running in a git pre-receive hook. This commit adds a REPOSITORY_REMOTE_FALLBACK environment variable for setting a fallback value for the remote URL. Issue #1158
1 parent 7218450 commit 6e25e87

File tree

7 files changed

+245
-3
lines changed

7 files changed

+245
-3
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@ GITGUARDIAN_INSTANCE=https://dashboard.gitguardian.com/
1616
# - and set TEST_GG_VALID_TOKEN_IGNORE_SHA to matching commit sha
1717
# TEST_GG_VALID_TOKEN=
1818
# TEST_GG_VALID_TOKEN_IGNORE_SHA=
19+
20+
# Fallback value for the repository remote URL in case it cannot be determined using `git remote -v`
21+
# This variable is particularly relevant when running ggshield in a git pre-receive hook
22+
#REPOSITORY_REMOTE_FALLBACK=
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Removed
10+
11+
- A bullet item for the Removed category.
12+
13+
-->
14+
15+
### Added
16+
17+
- Add `REPOSITORY_REMOTE_FALLBACK` environment variable that allows setting a fallback value for the repository remote.
18+
19+
<!--
20+
### Changed
21+
22+
- A bullet item for the Changed category.
23+
24+
-->
25+
<!--
26+
### Deprecated
27+
28+
- A bullet item for the Deprecated category.
29+
30+
-->
31+
<!--
32+
### Fixed
33+
34+
- A bullet item for the Fixed category.
35+
36+
-->
37+
<!--
38+
### Security
39+
40+
- A bullet item for the Security category.
41+
42+
-->

ggshield/core/env_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"GITGUARDIAN_INSTANCE",
1515
"GITGUARDIAN_API_URL",
1616
"GITGUARDIAN_API_KEY",
17+
"REPOSITORY_REMOTE_FALLBACK",
1718
}
1819

1920
logger = logging.getLogger(__name__)

ggshield/utils/git_shell.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,21 +379,39 @@ def get_repository_url_from_path(wd: Path) -> Optional[str]:
379379
Returns one of the repository remote urls. Returns None if no remote are found,
380380
or the directory is not a repository or we don't have git so we can't know if the
381381
directory is a repository.
382+
383+
If REPOSITORY_REMOTE_FALLBACK environment variable is set, it will be used as a
384+
fallback value when no remote URL can be detected from the git repository.
382385
"""
383386
try:
384387
if not is_git_available() or not is_git_dir(wd):
385-
return None
388+
return _get_repository_url_fallback()
386389
remotes_raw = git(["remote", "-v"], cwd=wd).splitlines()
387390
except (subprocess.CalledProcessError, OSError):
388-
return None
391+
return _get_repository_url_fallback()
389392

390393
url: Optional[str] = None
391394
for line in remotes_raw:
392395
if match := re.search(r"^(.*)\t(.*) \(fetch\)$", line):
393396
name, url = match.groups()
394397
if name == "origin":
395398
break
396-
return simplify_git_url(url) if url else None
399+
400+
if url:
401+
return simplify_git_url(url)
402+
403+
return _get_repository_url_fallback()
404+
405+
406+
def _get_repository_url_fallback() -> Optional[str]:
407+
"""
408+
Returns the repository URL from the REPOSITORY_REMOTE_FALLBACK environment variable.
409+
Returns None if the environment variable is not set or empty.
410+
"""
411+
url = os.getenv("REPOSITORY_REMOTE_FALLBACK")
412+
if url:
413+
return simplify_git_url(url)
414+
return None
397415

398416

399417
def get_filepaths_from_ref(

tests/unit/cmd/scan/test_path.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,47 @@ def test_scan_context_repository(
557557
and arg.get("GGShield-Repository-URL") == "github.com/owner/repository"
558558
for arg in scan_mock.call_args[0]
559559
)
560+
561+
@patch("pygitguardian.GGClient.multi_content_scan")
562+
@my_vcr.use_cassette("test_scan_context_repository.yaml")
563+
def test_scan_path_with_fallback_repository_url(
564+
self,
565+
scan_mock: Mock,
566+
tmp_path: Path,
567+
cli_fs_runner: CliRunner,
568+
) -> None:
569+
"""
570+
GIVEN a repository without a remote url
571+
WHEN executing a scan with REPOSITORY_REMOTE_FALLBACK set
572+
THEN the environment variable value is sent in the headers
573+
"""
574+
local_repo = Repository.create(tmp_path)
575+
576+
file = local_repo.path / "file_secret"
577+
write_text(file, "Hello")
578+
local_repo.add(file)
579+
local_repo.create_commit()
580+
581+
scan_result = MultiScanResult([])
582+
scan_result.status_code = 200
583+
scan_mock.return_value = scan_result
584+
585+
fallback_url = "https://github.com/fallback/repository.git"
586+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
587+
result = cli_fs_runner.invoke(
588+
cli,
589+
[
590+
"secret",
591+
"scan",
592+
"path",
593+
str(file),
594+
],
595+
)
596+
assert result.exit_code == ExitCode.SUCCESS, result.output
597+
598+
scan_mock.assert_called_once()
599+
assert any(
600+
isinstance(arg, dict)
601+
and arg.get("GGShield-Repository-URL") == "github.com/fallback/repository"
602+
for arg in scan_mock.call_args[0]
603+
)

tests/unit/core/scan/test_scan_context.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,57 @@ def test_ci_no_env(env, fake_url_repo: Repository) -> None:
123123
target_path=fake_url_repo.path,
124124
)
125125
_assert_repo_url_in_headers(context, EXPECTED_HEADER_REMOTE)
126+
127+
128+
def test_fallback_env_var_used_when_no_remote(tmp_path: Path) -> None:
129+
"""
130+
GIVEN a repository without a remote
131+
WHEN REPOSITORY_REMOTE_FALLBACK is set
132+
THEN the environment variable value is used in the headers
133+
"""
134+
# Create a repository without a remote
135+
repo = Repository.create(tmp_path / "repo")
136+
repo.create_commit()
137+
138+
fallback_url = "https://github.com/fallback/repository.git"
139+
with mock.patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
140+
context = ScanContext(
141+
scan_mode=ScanMode.PATH,
142+
command_path="ggshield secret scan path",
143+
target_path=repo.path,
144+
)
145+
_assert_repo_url_in_headers(context, "github.com/fallback/repository")
146+
147+
148+
def test_fallback_env_var_not_used_when_remote_exists(
149+
fake_url_repo: Repository,
150+
) -> None:
151+
"""
152+
GIVEN a repository with a remote
153+
WHEN REPOSITORY_REMOTE_FALLBACK is set
154+
THEN the remote URL takes precedence over the environment variable
155+
"""
156+
fallback_url = "https://github.com/fallback/repository.git"
157+
with mock.patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
158+
context = ScanContext(
159+
scan_mode=ScanMode.PATH,
160+
command_path="ggshield secret scan path",
161+
target_path=fake_url_repo.path,
162+
)
163+
_assert_repo_url_in_headers(context, EXPECTED_HEADER_REMOTE)
164+
165+
166+
def test_fallback_env_var_used_when_not_git_dir(tmp_path: Path) -> None:
167+
"""
168+
GIVEN a directory which is not a git repository
169+
WHEN REPOSITORY_REMOTE_FALLBACK is set
170+
THEN the environment variable value is used in the headers
171+
"""
172+
fallback_url = "https://github.com/fallback/repository.git"
173+
with mock.patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
174+
context = ScanContext(
175+
scan_mode=ScanMode.PATH,
176+
command_path="ggshield secret scan path",
177+
target_path=tmp_path,
178+
)
179+
_assert_repo_url_in_headers(context, "github.com/fallback/repository")

tests/unit/utils/test_git_shell.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,85 @@ def test_get_repository_url_from_path_subrepo(tmp_path: Path):
292292
assert "repository2" in get_repository_url_from_path(local_repo2.path)
293293

294294

295+
def test_get_repository_url_from_path_with_env_var_fallback(tmp_path: Path):
296+
# GIVEN a repository without a remote
297+
local_repository_path = tmp_path / "repo"
298+
repo = Repository.create(local_repository_path)
299+
repo.create_commit()
300+
301+
# AND a REPOSITORY_REMOTE_FALLBACK environment variable set
302+
fallback_url = "https://github.com/fallback/repository.git"
303+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
304+
# WHEN getting the repository URL
305+
url = get_repository_url_from_path(local_repository_path)
306+
307+
# THEN the fallback URL is returned and simplified
308+
assert url == "github.com/fallback/repository"
309+
310+
311+
def test_get_repository_url_from_path_env_var_not_used_when_remote_exists(
312+
tmp_path: Path,
313+
):
314+
# GIVEN a repository with a remote
315+
local_repo = _create_repository_with_remote(tmp_path, "repository")
316+
317+
# AND a REPOSITORY_REMOTE_FALLBACK environment variable set
318+
fallback_url = "https://github.com/fallback/repository.git"
319+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
320+
# WHEN getting the repository URL
321+
url = get_repository_url_from_path(local_repo.path)
322+
323+
# THEN the remote URL is returned, not the fallback
324+
assert "repository" in url
325+
assert "fallback" not in url
326+
327+
328+
def test_get_repository_url_from_path_env_var_with_non_git_dir(tmp_path: Path):
329+
# GIVEN a local directory with no git repository
330+
local_directory_path = tmp_path / "local"
331+
local_directory_path.mkdir()
332+
333+
# AND a REPOSITORY_REMOTE_FALLBACK environment variable set
334+
fallback_url = "https://github.com/fallback/repository.git"
335+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
336+
# WHEN getting the repository URL
337+
url = get_repository_url_from_path(local_directory_path)
338+
339+
# THEN the fallback URL is returned
340+
assert url == "github.com/fallback/repository"
341+
342+
343+
def test_get_repository_url_from_path_env_var_empty(tmp_path: Path):
344+
# GIVEN a repository without a remote
345+
local_repository_path = tmp_path / "repo"
346+
repo = Repository.create(local_repository_path)
347+
repo.create_commit()
348+
349+
# AND an empty REPOSITORY_REMOTE_FALLBACK environment variable
350+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": ""}):
351+
# WHEN getting the repository URL
352+
url = get_repository_url_from_path(local_repository_path)
353+
354+
# THEN no URL is returned
355+
assert url is None
356+
357+
358+
def test_get_repository_url_from_path_env_var_simplifies_url(tmp_path: Path):
359+
# GIVEN a repository without a remote
360+
local_repository_path = tmp_path / "repo"
361+
repo = Repository.create(local_repository_path)
362+
repo.create_commit()
363+
364+
# AND a REPOSITORY_REMOTE_FALLBACK environment variable with a complex URL
365+
fallback_url = "https://user:[email protected]:84/owner/repo.git"
366+
with patch.dict(os.environ, {"REPOSITORY_REMOTE_FALLBACK": fallback_url}):
367+
# WHEN getting the repository URL
368+
url = get_repository_url_from_path(local_repository_path)
369+
370+
# THEN the URL is simplified
371+
assert url == "github.com/owner/repo"
372+
373+
295374
def test_get_filepaths_from_ref(tmp_path):
296375
# GIVEN a repository
297376
repo = Repository.create(tmp_path)

0 commit comments

Comments
 (0)