Skip to content

Commit ef0d64e

Browse files
authored
Merge pull request #1133 from GitGuardian/salomevoltz/scrt-5971-ggshield-runs-into-oserror-errno-22-invalid-argument-when
fix(secret): skip non-seekable files during scanning
2 parents e5aa4a9 + 5681156 commit ef0d64e

File tree

5 files changed

+83
-5
lines changed

5 files changed

+83
-5
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Removed
10+
11+
- A bullet item for the Removed category.
12+
13+
-->
14+
<!--
15+
### Added
16+
17+
- A bullet item for the Added category.
18+
19+
-->
20+
<!--
21+
### Changed
22+
23+
- A bullet item for the Changed category.
24+
25+
-->
26+
<!--
27+
### Deprecated
28+
29+
- A bullet item for the Deprecated category.
30+
31+
-->
32+
33+
### Fixed
34+
35+
- Skip non-seekable files instead of crashing.
36+
37+
<!--
38+
### Security
39+
40+
- A bullet item for the Security category.
41+
42+
-->

ggshield/core/scan/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
from .file import File, create_files_from_paths
33
from .scan_context import ScanContext
44
from .scan_mode import ScanMode
5-
from .scannable import DecodeError, Scannable, StringScannable
5+
from .scannable import DecodeError, NonSeekableFileError, Scannable, StringScannable
66

77

88
__all__ = [
99
"create_files_from_paths",
1010
"Commit",
1111
"DecodeError",
1212
"File",
13+
"NonSeekableFileError",
1314
"ScanContext",
1415
"ScanMode",
1516
"Scannable",

ggshield/core/scan/scannable.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ class DecodeError(Exception):
3131
pass
3232

3333

34+
class NonSeekableFileError(Exception):
35+
"""Raised when a file cannot be seeked"""
36+
37+
pass
38+
39+
3440
class Scannable(ABC):
3541
"""Base class for content that can be scanned by GGShield"""
3642

@@ -143,8 +149,12 @@ def _is_file_longer_than(
143149
Raises DecodeError if the file cannot be decoded.
144150
"""
145151
# Get the byte size
146-
assert fp.seekable()
147-
byte_size = fp.seek(0, SEEK_END)
152+
# Note: IOBase.seekable() returns True on some non-seekable files like /proc/self/mounts
153+
try:
154+
byte_size = fp.seek(0, SEEK_END)
155+
fp.seek(0, SEEK_SET)
156+
except OSError as exc:
157+
raise NonSeekableFileError() from exc
148158

149159
if byte_size > max_utf8_encoded_size * UTF8_TO_WORSE_OTHER_ENCODING_RATIO:
150160
# Even if the file used the worst encoding (UTF-32), encoding the content of
@@ -153,7 +163,6 @@ def _is_file_longer_than(
153163
return True, None, None
154164

155165
# Determine the encoding
156-
fp.seek(0, SEEK_SET)
157166
charset_matches = charset_normalizer.from_fp(fp)
158167
charset_match = charset_matches.best()
159168
if charset_match is None:

ggshield/verticals/secret/secret_scanner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from ggshield.core.constants import MAX_WORKERS
1717
from ggshield.core.errors import handle_api_error
1818
from ggshield.core.scan import DecodeError, ScanContext, Scannable
19+
from ggshield.core.scan.scannable import NonSeekableFileError
1920
from ggshield.core.scanner_ui.scanner_ui import ScannerUI
2021
from ggshield.core.text_utils import pluralize
2122

@@ -157,6 +158,9 @@ def _start_scans(
157158
except DecodeError:
158159
scanner_ui.on_skipped(scannable, "can't detect encoding")
159160
continue
161+
except NonSeekableFileError:
162+
scanner_ui.on_skipped(scannable, "file cannot be seeked")
163+
continue
160164

161165
if content:
162166
if (

tests/unit/core/scan/test_scannable.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from pathlib import Path
2+
from unittest.mock import patch
23

34
import pytest
45

5-
from ggshield.core.scan import StringScannable
6+
from ggshield.core.scan import File, StringScannable
7+
from ggshield.core.scan.scannable import NonSeekableFileError
68

79

810
def test_string_scannable_path():
@@ -32,3 +34,23 @@ def test_string_scannable_is_longer_than(content, is_longer):
3234
"""
3335
scannable = StringScannable(content=content, url="u")
3436
assert scannable.is_longer_than(50) == is_longer
37+
38+
39+
@patch("pathlib.Path.open")
40+
def test_file_non_seekable(mock_open, tmp_path):
41+
"""
42+
GIVEN a File instance
43+
AND the file reports as seekable but seeking operations fail
44+
WHEN is_longer_than() is called on it
45+
THEN it raises NonSeekableFileError
46+
"""
47+
mock_file = mock_open.return_value.__enter__.return_value
48+
mock_file.seekable.return_value = True
49+
mock_file.seek.side_effect = OSError(22, "Invalid argument")
50+
51+
test_file = tmp_path / "test.txt"
52+
test_file.write_text("test content")
53+
file_obj = File(test_file)
54+
55+
with pytest.raises(NonSeekableFileError):
56+
file_obj.is_longer_than(1000)

0 commit comments

Comments
 (0)