From 2490eb2acffc619b53cdf1418a1e356d5e7e7c05 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 20 Aug 2025 13:05:34 +0800 Subject: [PATCH 01/10] Check symlink target in tar extraction fallback for Pythons without data_filter --- src/pip/_internal/utils/unpacking.py | 19 +++++ tests/unit/test_utils_unpacking.py | 100 +++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 0ad3129acf4..b33d0ee7842 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -255,6 +255,17 @@ def _untar_without_filter( leading: bool, ) -> None: """Fallback for Python without tarfile.data_filter""" + + def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None: + linkname = "/".join( + filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)) + ) + + try: + tar.getmember(linkname) + except KeyError: + raise KeyError(linkname) + for member in tar.getmembers(): fn = member.name if leading: @@ -269,6 +280,14 @@ def _untar_without_filter( if member.isdir(): ensure_dir(path) elif member.issym(): + try: + _check_link_target(tar, member) + except KeyError as exc: + message = ( + "The tar file ({}) has a file ({}) trying to install " + "outside target directory ({})" + ) + raise InstallationError(message.format(filename, member.name, exc)) try: tar._extract_member(member, path) except Exception as exc: diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 6f373b1acad..1332f7a63c7 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -10,6 +10,7 @@ from pathlib import Path import pytest +from _pytest.monkeypatch import MonkeyPatch from pip._internal.exceptions import InstallationError from pip._internal.utils.unpacking import is_within_directory, untar_file, unzip_file @@ -238,6 +239,105 @@ def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None: with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f: assert f.read() == content + def test_unpack_normal_tar_links_no_data_filter( + self, monkeypatch: MonkeyPatch + ) -> None: + """ + Test unpacking a normal tar with file containing soft links, but no data_filter + """ + if hasattr(tarfile, "data_filter"): + monkeypatch.delattr("tarfile.data_filter") + + tar_filename = "test_tar_links_no_data_filter.tar" + tar_filepath = os.path.join(self.tempdir, tar_filename) + + extract_path = os.path.join(self.tempdir, "extract_path") + + with tarfile.open(tar_filepath, "w") as tar: + file_data = io.BytesIO(b"normal\n") + normal_file_tarinfo = tarfile.TarInfo(name="normal_file") + normal_file_tarinfo.size = len(file_data.getbuffer()) + tar.addfile(normal_file_tarinfo, fileobj=file_data) + + info = tarfile.TarInfo("normal_symlink") + info.type = tarfile.SYMTYPE + info.linkpath = "normal_file" + tar.addfile(info) + + untar_file(tar_filepath, extract_path) + + assert os.path.islink(os.path.join(extract_path, "normal_symlink")) + + link_path = os.readlink(os.path.join(extract_path, "normal_symlink")) + assert link_path == "normal_file" + + with open(os.path.join(extract_path, "normal_symlink"), "rb") as f: + assert f.read() == b"normal\n" + + def test_unpack_evil_tar_link1_no_data_filter( + self, monkeypatch: MonkeyPatch + ) -> None: + """ + Test unpacking a evil tar with file containing soft links, but no data_filter + """ + if hasattr(tarfile, "data_filter"): + monkeypatch.delattr("tarfile.data_filter") + + tar_filename = "test_tar_links_no_data_filter.tar" + tar_filepath = os.path.join(self.tempdir, tar_filename) + + import_filename = "import_file" + import_filepath = os.path.join(self.tempdir, import_filename) + open(import_filepath, "w").close() + + extract_path = os.path.join(self.tempdir, "extract_path") + + with tarfile.open(tar_filepath, "w") as tar: + info = tarfile.TarInfo("evil_symlink") + info.type = tarfile.SYMTYPE + info.linkpath = import_filepath + tar.addfile(info) + + with pytest.raises(InstallationError) as e: + untar_file(tar_filepath, extract_path) + + assert "trying to install outside target directory" in str(e.value) + assert "import_file" in str(e.value) + + assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) + + def test_unpack_evil_tar_link2_no_data_filter( + self, monkeypatch: MonkeyPatch + ) -> None: + """ + Test unpacking a evil tar with file containing soft links, but no data_filter + """ + if hasattr(tarfile, "data_filter"): + monkeypatch.delattr("tarfile.data_filter") + + tar_filename = "test_tar_links_no_data_filter.tar" + tar_filepath = os.path.join(self.tempdir, tar_filename) + + import_filename = "import_file" + import_filepath = os.path.join(self.tempdir, import_filename) + open(import_filepath, "w").close() + + extract_path = os.path.join(self.tempdir, "extract_path") + + with tarfile.open(tar_filepath, "w") as tar: + info = tarfile.TarInfo("evil_symlink") + info.type = tarfile.SYMTYPE + info.linkpath = ".." + os.sep + import_filename + tar.addfile(info) + + with pytest.raises(InstallationError) as e: + untar_file(tar_filepath, extract_path) + + assert "trying to install outside target directory" in str(e.value) + assert ".." + os.sep + import_filename in str(e.value) + + assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) + def test_unpack_tar_unicode(tmpdir: Path) -> None: test_tar = tmpdir / "test.tar" From 3e158244d3b5e331fd6f81736de71d73f2335490 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 20 Aug 2025 13:35:51 +0800 Subject: [PATCH 02/10] Add NEWS entry --- news/13550.bugfix.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 news/13550.bugfix.rst diff --git a/news/13550.bugfix.rst b/news/13550.bugfix.rst new file mode 100644 index 00000000000..64fea7f39c2 --- /dev/null +++ b/news/13550.bugfix.rst @@ -0,0 +1,2 @@ +Add the _check_link_targetfunction to validate the file pointed to by a symlink. If path traversal +is detected, it should raise an InstallationErrorexception, similar to is_within_directory. \ No newline at end of file From bbe7cc76a8a2ada11c518cb1c52bcd9d6a35a41d Mon Sep 17 00:00:00 2001 From: user Date: Wed, 20 Aug 2025 13:46:25 +0800 Subject: [PATCH 03/10] Fix the Windows path issue in test cases. --- tests/unit/test_utils_unpacking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 1332f7a63c7..6616ad8a19a 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -334,7 +334,8 @@ def test_unpack_evil_tar_link2_no_data_filter( untar_file(tar_filepath, extract_path) assert "trying to install outside target directory" in str(e.value) - assert ".." + os.sep + import_filename in str(e.value) + assert ".." in str(e.value) + assert import_filename in str(e.value) assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) From 7f2a97991e449dbe99d00cefe2a8f7edcf3110a3 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 20 Aug 2025 16:28:53 +0800 Subject: [PATCH 04/10] normpath linkname --- news/13550.bugfix.rst | 4 +-- src/pip/_internal/utils/unpacking.py | 2 ++ tests/unit/test_utils_unpacking.py | 37 +++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/news/13550.bugfix.rst b/news/13550.bugfix.rst index 64fea7f39c2..cfc6b966459 100644 --- a/news/13550.bugfix.rst +++ b/news/13550.bugfix.rst @@ -1,2 +1,2 @@ -Add the _check_link_targetfunction to validate the file pointed to by a symlink. If path traversal -is detected, it should raise an InstallationErrorexception, similar to is_within_directory. \ No newline at end of file +Add the _check_link_targetfunction to validate the file pointed to by a symlink. If path traversal +is detected, it should raise an InstallationErrorexception, similar to is_within_directory. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index b33d0ee7842..57f5c2f3e4c 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -261,6 +261,8 @@ def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None: filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)) ) + linkname = os.path.normpath(linkname) + try: tar.getmember(linkname) except KeyError: diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 6616ad8a19a..2b02dcb3b1d 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -239,7 +239,7 @@ def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None: with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f: assert f.read() == content - def test_unpack_normal_tar_links_no_data_filter( + def test_unpack_normal_tar_link1_no_data_filter( self, monkeypatch: MonkeyPatch ) -> None: """ @@ -274,6 +274,41 @@ def test_unpack_normal_tar_links_no_data_filter( with open(os.path.join(extract_path, "normal_symlink"), "rb") as f: assert f.read() == b"normal\n" + def test_unpack_normal_tar_link2_no_data_filter( + self, monkeypatch: MonkeyPatch + ) -> None: + """ + Test unpacking a normal tar with file containing soft links, but no data_filter + """ + if hasattr(tarfile, "data_filter"): + monkeypatch.delattr("tarfile.data_filter") + + tar_filename = "test_tar_links_no_data_filter.tar" + tar_filepath = os.path.join(self.tempdir, tar_filename) + + extract_path = os.path.join(self.tempdir, "extract_path") + + with tarfile.open(tar_filepath, "w") as tar: + file_data = io.BytesIO(b"normal\n") + normal_file_tarinfo = tarfile.TarInfo(name="normal_file") + normal_file_tarinfo.size = len(file_data.getbuffer()) + tar.addfile(normal_file_tarinfo, fileobj=file_data) + + info = tarfile.TarInfo("sub/normal_symlink") + info.type = tarfile.SYMTYPE + info.linkpath = ".." + os.path.sep + "normal_file" + tar.addfile(info) + + untar_file(tar_filepath, extract_path) + + assert os.path.islink(os.path.join(extract_path, "sub", "normal_symlink")) + + link_path = os.readlink(os.path.join(extract_path, "sub", "normal_symlink")) + assert link_path == ".." + os.path.sep + "normal_file" + + with open(os.path.join(extract_path, "sub", "normal_symlink"), "rb") as f: + assert f.read() == b"normal\n" + def test_unpack_evil_tar_link1_no_data_filter( self, monkeypatch: MonkeyPatch ) -> None: From 3390548a63f92af083c8ab008c9556bb78fad378 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 20 Aug 2025 17:54:38 +0800 Subject: [PATCH 05/10] Handle cases where the name of a member in a tar archive may use different separators. --- src/pip/_internal/utils/unpacking.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 57f5c2f3e4c..45dc6f801de 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -266,6 +266,15 @@ def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None: try: tar.getmember(linkname) except KeyError: + if "\\" in linkname or "/" in linkname: + if "\\" in linkname: + linkname = linkname.replace("\\", "/") + else: + linkname = linkname.replace("/", "\\") + try: + tar.getmember(linkname) + except KeyError: + raise KeyError(linkname) raise KeyError(linkname) for member in tar.getmembers(): From eaee1818a8e6be08aad1bcfa776b3261b5343e70 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 20 Aug 2025 18:22:18 +0800 Subject: [PATCH 06/10] Fix the bug in the process logic. --- src/pip/_internal/utils/unpacking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 45dc6f801de..44f2d7f5401 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -275,7 +275,8 @@ def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None: tar.getmember(linkname) except KeyError: raise KeyError(linkname) - raise KeyError(linkname) + else: + raise KeyError(linkname) for member in tar.getmembers(): fn = member.name From dcd1ff5b159de03dcce56530b2dfba7d6f6a6963 Mon Sep 17 00:00:00 2001 From: dkjsone <221672629+dkjsone@users.noreply.github.com> Date: Wed, 27 Aug 2025 22:06:51 +0800 Subject: [PATCH 07/10] =?UTF-8?q?Replace=20=5Fcheck=5Flink=5Ftarget=20with?= =?UTF-8?q?=20is=5Fsymlink=5Ftarget=5Fin=5Ftar=E2=80=8B=E2=80=8B;=20?= =?UTF-8?q?=E2=80=8B=E2=80=8BAdd=20deprecation=20note=20for=20=5Funtar=5Fw?= =?UTF-8?q?ithout=5Ffilter=20for=20future=20removal=E2=80=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news/13550.bugfix.rst | 4 +-- src/pip/_internal/utils/unpacking.py | 49 +++++++++++++--------------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/news/13550.bugfix.rst b/news/13550.bugfix.rst index cfc6b966459..667d4bd5776 100644 --- a/news/13550.bugfix.rst +++ b/news/13550.bugfix.rst @@ -1,2 +1,2 @@ -Add the _check_link_targetfunction to validate the file pointed to by a symlink. If path traversal -is detected, it should raise an InstallationErrorexception, similar to is_within_directory. +Pip will now raise an installation error for a source distribution when it includes a symlink that +points outside the source distribution archive. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 44f2d7f5401..47e28a6bc1e 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -248,6 +248,21 @@ def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo: tar.close() +def is_symlink_target_in_tar(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> bool: + """Check if the file pointed to by the symbolic link is in the tar archive""" + linkname = os.path.join(os.path.dirname(tarinfo.name), tarinfo.linkname) + + linkname = os.path.normpath(linkname) + if "\\" in linkname: + linkname = linkname.replace("\\", "/") + + try: + tar.getmember(linkname) + return True + except KeyError: + return False + + def _untar_without_filter( filename: str, location: str, @@ -255,29 +270,9 @@ def _untar_without_filter( leading: bool, ) -> None: """Fallback for Python without tarfile.data_filter""" - - def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None: - linkname = "/".join( - filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)) - ) - - linkname = os.path.normpath(linkname) - - try: - tar.getmember(linkname) - except KeyError: - if "\\" in linkname or "/" in linkname: - if "\\" in linkname: - linkname = linkname.replace("\\", "/") - else: - linkname = linkname.replace("/", "\\") - try: - tar.getmember(linkname) - except KeyError: - raise KeyError(linkname) - else: - raise KeyError(linkname) - + # NOTE: This function can be removed once pip requires CPython ≥ 3.12.​ + # PEP 706 added tarfile.data_filter, made tarfile extraction operations more secure. + # This feature is fully supported from CPython 3.12 onward. for member in tar.getmembers(): fn = member.name if leading: @@ -292,14 +287,14 @@ def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None: if member.isdir(): ensure_dir(path) elif member.issym(): - try: - _check_link_target(tar, member) - except KeyError as exc: + if not is_symlink_target_in_tar(tar, member): message = ( "The tar file ({}) has a file ({}) trying to install " "outside target directory ({})" ) - raise InstallationError(message.format(filename, member.name, exc)) + raise InstallationError( + message.format(filename, member.name, member.linkname) + ) try: tar._extract_member(member, path) except Exception as exc: From 399f4ea139345a21a2e93b3b4a90b437ace91f94 Mon Sep 17 00:00:00 2001 From: dkjsone <221672629+dkjsone@users.noreply.github.com> Date: Wed, 3 Sep 2025 23:24:51 +0800 Subject: [PATCH 08/10] Update test cases with clearer exception assertions; Update news entry --- news/13550.bugfix.rst | 4 ++-- tests/unit/test_utils_unpacking.py | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/news/13550.bugfix.rst b/news/13550.bugfix.rst index 667d4bd5776..6804656d758 100644 --- a/news/13550.bugfix.rst +++ b/news/13550.bugfix.rst @@ -1,2 +1,2 @@ -Pip will now raise an installation error for a source distribution when it includes a symlink that -points outside the source distribution archive. +For Python versions that do not support PEP 706, pip will now raise an installation error for a +source distribution when it includes a symlink that points outside the source distribution archive. \ No newline at end of file diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 2b02dcb3b1d..8f0c25c13fd 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -336,8 +336,8 @@ def test_unpack_evil_tar_link1_no_data_filter( with pytest.raises(InstallationError) as e: untar_file(tar_filepath, extract_path) - assert "trying to install outside target directory" in str(e.value) - assert "import_file" in str(e.value) + msg = "The tar file ({}) has a file ({}) trying to install outside target directory ({})" + assert msg.format(tar_filepath, "evil_symlink", import_filepath) in str(e.value) assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) @@ -359,18 +359,19 @@ def test_unpack_evil_tar_link2_no_data_filter( extract_path = os.path.join(self.tempdir, "extract_path") + link_path = ".." + os.sep + import_filename + with tarfile.open(tar_filepath, "w") as tar: info = tarfile.TarInfo("evil_symlink") info.type = tarfile.SYMTYPE - info.linkpath = ".." + os.sep + import_filename + info.linkpath = link_path tar.addfile(info) with pytest.raises(InstallationError) as e: untar_file(tar_filepath, extract_path) - assert "trying to install outside target directory" in str(e.value) - assert ".." in str(e.value) - assert import_filename in str(e.value) + msg = "The tar file ({}) has a file ({}) trying to install outside target directory ({})" + assert msg.format(tar_filepath, "evil_symlink", link_path) in str(e.value) assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) From fb0a8e6331df1de5343db2e25ddae48a81e1b072 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 4 Sep 2025 11:16:23 +0800 Subject: [PATCH 09/10] Format adjustment, no content changes --- news/13550.bugfix.rst | 4 ++-- tests/unit/test_utils_unpacking.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/news/13550.bugfix.rst b/news/13550.bugfix.rst index 6804656d758..e7de219a568 100644 --- a/news/13550.bugfix.rst +++ b/news/13550.bugfix.rst @@ -1,2 +1,2 @@ -For Python versions that do not support PEP 706, pip will now raise an installation error for a -source distribution when it includes a symlink that points outside the source distribution archive. \ No newline at end of file +For Python versions that do not support PEP 706, pip will now raise an installation error for a +source distribution when it includes a symlink that points outside the source distribution archive. diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 8f0c25c13fd..003cce1488e 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -336,7 +336,10 @@ def test_unpack_evil_tar_link1_no_data_filter( with pytest.raises(InstallationError) as e: untar_file(tar_filepath, extract_path) - msg = "The tar file ({}) has a file ({}) trying to install outside target directory ({})" + msg = ( + "The tar file ({}) has a file ({}) trying to install outside " + "target directory ({})" + ) assert msg.format(tar_filepath, "evil_symlink", import_filepath) in str(e.value) assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) @@ -370,7 +373,10 @@ def test_unpack_evil_tar_link2_no_data_filter( with pytest.raises(InstallationError) as e: untar_file(tar_filepath, extract_path) - msg = "The tar file ({}) has a file ({}) trying to install outside target directory ({})" + msg = ( + "The tar file ({}) has a file ({}) trying to install outside " + "target directory ({})" + ) assert msg.format(tar_filepath, "evil_symlink", link_path) in str(e.value) assert not os.path.exists(os.path.join(extract_path, "evil_symlink")) From b154d0600f1712c0d5127cf59c9abf94c87d04b3 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 5 Sep 2025 10:22:10 +0800 Subject: [PATCH 10/10] =?UTF-8?q?Remove=20redundant=20check=20before=20bac?= =?UTF-8?q?kslash=20replacement=E2=80=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pip/_internal/utils/unpacking.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 47e28a6bc1e..bc950ac93f3 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -253,8 +253,7 @@ def is_symlink_target_in_tar(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> linkname = os.path.join(os.path.dirname(tarinfo.name), tarinfo.linkname) linkname = os.path.normpath(linkname) - if "\\" in linkname: - linkname = linkname.replace("\\", "/") + linkname = linkname.replace("\\", "/") try: tar.getmember(linkname)