Skip to content

Commit 0bffb9e

Browse files
authored
Remove check of matching claimed and detected MIME type (#60)
It seems not necessary since we already check allowed MIME types against the detected type and not the claimed one. It leads to quite a bunch of false positives since a lot of clients have trouble properly detecting the type.
1 parent 443eed4 commit 0bffb9e

File tree

2 files changed

+5
-47
lines changed

2 files changed

+5
-47
lines changed

src/matrix_content_scanner/scanner/scanner.py

Lines changed: 5 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,8 @@ async def _scan_media(
332332
# If the file is encrypted, we need to decrypt it before we can scan it.
333333
media_content = self._decrypt_file(media_content, metadata)
334334

335-
# Check the file's MIME type to see if it's allowed and, if the file is not
336-
# encrypted, if it matches the Content-Type header the homeserver sent us.
337-
self._check_mimetype(
338-
media_content=media_content,
339-
claimed_mimetype=media.content_type,
340-
encrypted=metadata is not None,
341-
)
335+
# Check the file's MIME type to see if it's allowed.
336+
self._check_mimetype(media_content)
342337

343338
# Write the file to disk.
344339
file_path = self._write_file_to_disk(media_path, media_content)
@@ -498,42 +493,18 @@ async def _run_scan(self, file_name: str) -> int:
498493

499494
return retcode
500495

501-
def _check_mimetype(
502-
self,
503-
media_content: bytes,
504-
claimed_mimetype: str,
505-
encrypted: bool,
506-
) -> None:
507-
"""Detects the MIME type of the provided bytes, and checks that:
508-
* it matches with the Content-Type header that was received when downloading this
509-
file (if the media isn't encrypted, since otherwise the Content-Type header
510-
is always 'application/octet-stream')
511-
* files with this MIME type are allowed (if an allow list is provided in the
512-
configuration)
496+
def _check_mimetype(self,media_content: bytes) -> None:
497+
"""Detects the MIME type of the provided bytes, and checks that this type is allowed
498+
(if an allow list is provided in the configuration)
513499
Args:
514500
media_content: The file's content. If the file is encrypted, this is its
515501
decrypted content.
516-
claimed_mimetype: The value of the Content-Type header received when
517-
downloading the file.
518-
encrypted: Whether the file was encrypted (in which case we don't want to
519-
check that its MIME type matches with the Content-Type header).
520502
Raises:
521503
FileDirtyError if one of the checks fail.
522504
"""
523505
detected_mimetype = magic.mimetype(media_content)
524506
logger.debug("Detected MIME type for file is %s", detected_mimetype)
525507

526-
# Check if the MIME type is matching the one that's expected, but only if the file
527-
# is not encrypted (because otherwise we'll always have 'application/octet-stream'
528-
# in the Content-Type header regardless of the actual MIME type of the file).
529-
if encrypted is False and detected_mimetype != claimed_mimetype:
530-
logger.error(
531-
"Mismatching MIME type (%s) and Content-Type header (%s)",
532-
detected_mimetype,
533-
claimed_mimetype,
534-
)
535-
raise FileDirtyError("File type not supported")
536-
537508
# If there's an allow list for MIME types, check that the MIME type that's been
538509
# detected for this file is in it.
539510
if (

tests/scanner/test_scanner.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -247,19 +247,6 @@ async def test_mimetype_encrypted(self) -> None:
247247
with self.assertRaises(FileDirtyError):
248248
await self.scanner.scan_file(MEDIA_PATH, ENCRYPTED_FILE_METADATA)
249249

250-
async def test_mimetype_content_type_mismatch(self) -> None:
251-
"""Tests that a scan fails if the detected MIME type does not match the value of
252-
the Content-Type header sent by the homeserver.
253-
"""
254-
# Set up the file description to make it look as if the homeserver tried to tell
255-
# us the file is a JPEG (even though it's actually a PNG).
256-
self.downloader_res.content_type = "image/jpeg"
257-
258-
# Check that the scan fails since the file's detected MIME type doesn't match the
259-
# value of the Content-Type header.
260-
with self.assertRaises(FileDirtyError):
261-
await self.scanner.scan_file(MEDIA_PATH)
262-
263250
async def test_dont_cache_exit_codes(self) -> None:
264251
"""Tests that if the configuration specifies exit codes to ignore when running
265252
the scanning script, we don't cache them.

0 commit comments

Comments
 (0)