Skip to content

Commit 92cc9f1

Browse files
committed
fix(secret): skip non-seekable files during scanning
1 parent f92b323 commit 92cc9f1

File tree

6 files changed

+98
-12
lines changed

6 files changed

+98
-12
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Removed
10+
11+
- A bullet item for the Removed category.
12+
13+
-->
14+
<!--
15+
### Added
16+
17+
- A bullet item for the Added category.
18+
19+
-->
20+
<!--
21+
### Changed
22+
23+
- A bullet item for the Changed category.
24+
25+
-->
26+
<!--
27+
### Deprecated
28+
29+
- A bullet item for the Deprecated category.
30+
31+
-->
32+
33+
### Fixed
34+
35+
- Skip non-seekable files instead of crashing.
36+
37+
<!--
38+
### Security
39+
40+
- A bullet item for the Security category.
41+
42+
-->

ggshield/core/scan/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
from .file import File, create_files_from_paths
33
from .scan_context import ScanContext
44
from .scan_mode import ScanMode
5-
from .scannable import DecodeError, Scannable, StringScannable
5+
from .scannable import DecodeError, NonSeekableFileError, Scannable, StringScannable
66

77

88
__all__ = [
99
"create_files_from_paths",
1010
"Commit",
1111
"DecodeError",
1212
"File",
13+
"NonSeekableFileError",
1314
"ScanContext",
1415
"ScanMode",
1516
"Scannable",

ggshield/core/scan/file.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from ggshield.utils.files import ListFilesMode, is_path_binary, list_files, url_for_path
55

6-
from .scannable import Scannable
6+
from .scannable import NonSeekableFileError, Scannable
77

88

99
class File(Scannable):
@@ -30,13 +30,16 @@ def is_longer_than(self, max_utf8_encoded_size: int) -> bool:
3030
# We already have the encoded size, easy
3131
return self._utf8_encoded_size > max_utf8_encoded_size
3232

33-
with self.path.open("rb") as fp:
34-
(
35-
result,
36-
self._content,
37-
self._utf8_encoded_size,
38-
) = Scannable._is_file_longer_than(fp, max_utf8_encoded_size)
39-
return result
33+
try:
34+
with self.path.open("rb") as fp:
35+
(
36+
result,
37+
self._content,
38+
self._utf8_encoded_size,
39+
) = Scannable._is_file_longer_than(fp, max_utf8_encoded_size)
40+
return result
41+
except NonSeekableFileError:
42+
raise
4043

4144
def _read_content(self) -> None:
4245
if self._content is None:

ggshield/core/scan/scannable.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ class DecodeError(Exception):
3131
pass
3232

3333

34+
class NonSeekableFileError(Exception):
35+
"""Raised when a file cannot be seeked"""
36+
37+
pass
38+
39+
3440
class Scannable(ABC):
3541
"""Base class for content that can be scanned by GGShield"""
3642

@@ -144,7 +150,10 @@ def _is_file_longer_than(
144150
"""
145151
# Get the byte size
146152
assert fp.seekable()
147-
byte_size = fp.seek(0, SEEK_END)
153+
try:
154+
byte_size = fp.seek(0, SEEK_END)
155+
except OSError:
156+
raise NonSeekableFileError("File does not support seeking operations")
148157

149158
if byte_size > max_utf8_encoded_size * UTF8_TO_WORSE_OTHER_ENCODING_RATIO:
150159
# Even if the file used the worst encoding (UTF-32), encoding the content of
@@ -153,7 +162,10 @@ def _is_file_longer_than(
153162
return True, None, None
154163

155164
# Determine the encoding
156-
fp.seek(0, SEEK_SET)
165+
try:
166+
fp.seek(0, SEEK_SET)
167+
except OSError:
168+
raise NonSeekableFileError("File does not support seeking operations")
157169
charset_matches = charset_normalizer.from_fp(fp)
158170
charset_match = charset_matches.best()
159171
if charset_match is None:

ggshield/verticals/secret/secret_scanner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from ggshield.core.constants import MAX_WORKERS
1717
from ggshield.core.errors import handle_api_error
1818
from ggshield.core.scan import DecodeError, ScanContext, Scannable
19+
from ggshield.core.scan.scannable import NonSeekableFileError
1920
from ggshield.core.scanner_ui.scanner_ui import ScannerUI
2021
from ggshield.core.text_utils import pluralize
2122

@@ -157,6 +158,9 @@ def _start_scans(
157158
except DecodeError:
158159
scanner_ui.on_skipped(scannable, "can't detect encoding")
159160
continue
161+
except NonSeekableFileError:
162+
scanner_ui.on_skipped(scannable, "file cannot be seeked")
163+
continue
160164

161165
if content:
162166
if (

tests/unit/core/scan/test_scannable.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from pathlib import Path
2+
from unittest.mock import patch
23

34
import pytest
45

5-
from ggshield.core.scan import StringScannable
6+
from ggshield.core.scan import File, StringScannable
7+
from ggshield.core.scan.scannable import NonSeekableFileError
68

79

810
def test_string_scannable_path():
@@ -32,3 +34,25 @@ def test_string_scannable_is_longer_than(content, is_longer):
3234
"""
3335
scannable = StringScannable(content=content, url="u")
3436
assert scannable.is_longer_than(50) == is_longer
37+
38+
39+
@patch("pathlib.Path.open")
40+
def test_file_non_seekable(mock_open, tmp_path):
41+
"""
42+
GIVEN a File instance
43+
AND the file reports as seekable but seeking operations fail
44+
WHEN is_longer_than() is called on it
45+
THEN it raises NonSeekableFileError
46+
"""
47+
mock_file = mock_open.return_value.__enter__.return_value
48+
mock_file.seekable.return_value = True
49+
mock_file.seek.side_effect = OSError(22, "Invalid argument")
50+
51+
test_file = tmp_path / "test.txt"
52+
test_file.write_text("test content")
53+
file_obj = File(test_file)
54+
55+
with pytest.raises(
56+
NonSeekableFileError, match="File does not support seeking operations"
57+
):
58+
file_obj.is_longer_than(1000)

0 commit comments

Comments
 (0)