Skip to content
2 changes: 2 additions & 0 deletions news/13550.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add the _check_link_targetfunction to validate the file pointed to by a symlink. If path traversal
is detected, it should raise an InstallationErrorexception, similar to is_within_directory.
31 changes: 31 additions & 0 deletions src/pip/_internal/utils/unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,29 @@ def _untar_without_filter(
leading: bool,
) -> None:
"""Fallback for Python without tarfile.data_filter"""

def _check_link_target(tar: tarfile.TarFile, tarinfo: tarfile.TarInfo) -> None:
linkname = "/".join(
filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))
)

linkname = os.path.normpath(linkname)

try:
tar.getmember(linkname)
except KeyError:
if "\\" in linkname or "/" in linkname:
if "\\" in linkname:
linkname = linkname.replace("\\", "/")
else:
linkname = linkname.replace("/", "\\")
try:
tar.getmember(linkname)
except KeyError:
raise KeyError(linkname)
else:
raise KeyError(linkname)

for member in tar.getmembers():
fn = member.name
if leading:
Expand All @@ -269,6 +292,14 @@ def _untar_without_filter(
if member.isdir():
ensure_dir(path)
elif member.issym():
try:
_check_link_target(tar, member)
except KeyError as exc:
message = (
"The tar file ({}) has a file ({}) trying to install "
"outside target directory ({})"
)
raise InstallationError(message.format(filename, member.name, exc))
try:
tar._extract_member(member, path)
except Exception as exc:
Expand Down
136 changes: 136 additions & 0 deletions tests/unit/test_utils_unpacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pathlib import Path

import pytest
from _pytest.monkeypatch import MonkeyPatch

from pip._internal.exceptions import InstallationError
from pip._internal.utils.unpacking import is_within_directory, untar_file, unzip_file
Expand Down Expand Up @@ -238,6 +239,141 @@ def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None:
with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f:
assert f.read() == content

def test_unpack_normal_tar_link1_no_data_filter(
self, monkeypatch: MonkeyPatch
) -> None:
"""
Test unpacking a normal tar with file containing soft links, but no data_filter
"""
if hasattr(tarfile, "data_filter"):
monkeypatch.delattr("tarfile.data_filter")

tar_filename = "test_tar_links_no_data_filter.tar"
tar_filepath = os.path.join(self.tempdir, tar_filename)

extract_path = os.path.join(self.tempdir, "extract_path")

with tarfile.open(tar_filepath, "w") as tar:
file_data = io.BytesIO(b"normal\n")
normal_file_tarinfo = tarfile.TarInfo(name="normal_file")
normal_file_tarinfo.size = len(file_data.getbuffer())
tar.addfile(normal_file_tarinfo, fileobj=file_data)

info = tarfile.TarInfo("normal_symlink")
info.type = tarfile.SYMTYPE
info.linkpath = "normal_file"
tar.addfile(info)

untar_file(tar_filepath, extract_path)

assert os.path.islink(os.path.join(extract_path, "normal_symlink"))

link_path = os.readlink(os.path.join(extract_path, "normal_symlink"))
assert link_path == "normal_file"

with open(os.path.join(extract_path, "normal_symlink"), "rb") as f:
assert f.read() == b"normal\n"

def test_unpack_normal_tar_link2_no_data_filter(
self, monkeypatch: MonkeyPatch
) -> None:
"""
Test unpacking a normal tar with file containing soft links, but no data_filter
"""
if hasattr(tarfile, "data_filter"):
monkeypatch.delattr("tarfile.data_filter")

tar_filename = "test_tar_links_no_data_filter.tar"
tar_filepath = os.path.join(self.tempdir, tar_filename)

extract_path = os.path.join(self.tempdir, "extract_path")

with tarfile.open(tar_filepath, "w") as tar:
file_data = io.BytesIO(b"normal\n")
normal_file_tarinfo = tarfile.TarInfo(name="normal_file")
normal_file_tarinfo.size = len(file_data.getbuffer())
tar.addfile(normal_file_tarinfo, fileobj=file_data)

info = tarfile.TarInfo("sub/normal_symlink")
info.type = tarfile.SYMTYPE
info.linkpath = ".." + os.path.sep + "normal_file"
tar.addfile(info)

untar_file(tar_filepath, extract_path)

assert os.path.islink(os.path.join(extract_path, "sub", "normal_symlink"))

link_path = os.readlink(os.path.join(extract_path, "sub", "normal_symlink"))
assert link_path == ".." + os.path.sep + "normal_file"

with open(os.path.join(extract_path, "sub", "normal_symlink"), "rb") as f:
assert f.read() == b"normal\n"

def test_unpack_evil_tar_link1_no_data_filter(
self, monkeypatch: MonkeyPatch
) -> None:
"""
Test unpacking a evil tar with file containing soft links, but no data_filter
"""
if hasattr(tarfile, "data_filter"):
monkeypatch.delattr("tarfile.data_filter")

tar_filename = "test_tar_links_no_data_filter.tar"
tar_filepath = os.path.join(self.tempdir, tar_filename)

import_filename = "import_file"
import_filepath = os.path.join(self.tempdir, import_filename)
open(import_filepath, "w").close()

extract_path = os.path.join(self.tempdir, "extract_path")

with tarfile.open(tar_filepath, "w") as tar:
info = tarfile.TarInfo("evil_symlink")
info.type = tarfile.SYMTYPE
info.linkpath = import_filepath
tar.addfile(info)

with pytest.raises(InstallationError) as e:
untar_file(tar_filepath, extract_path)

assert "trying to install outside target directory" in str(e.value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we know the full paths, can we not template to do an exact match of the complete message in both negative cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The modifications have been adjusted based on your suggestions.

assert "import_file" in str(e.value)

assert not os.path.exists(os.path.join(extract_path, "evil_symlink"))

def test_unpack_evil_tar_link2_no_data_filter(
self, monkeypatch: MonkeyPatch
) -> None:
"""
Test unpacking a evil tar with file containing soft links, but no data_filter
"""
if hasattr(tarfile, "data_filter"):
monkeypatch.delattr("tarfile.data_filter")

tar_filename = "test_tar_links_no_data_filter.tar"
tar_filepath = os.path.join(self.tempdir, tar_filename)

import_filename = "import_file"
import_filepath = os.path.join(self.tempdir, import_filename)
open(import_filepath, "w").close()

extract_path = os.path.join(self.tempdir, "extract_path")

with tarfile.open(tar_filepath, "w") as tar:
info = tarfile.TarInfo("evil_symlink")
info.type = tarfile.SYMTYPE
info.linkpath = ".." + os.sep + import_filename
tar.addfile(info)

with pytest.raises(InstallationError) as e:
untar_file(tar_filepath, extract_path)

assert "trying to install outside target directory" in str(e.value)
assert ".." in str(e.value)
assert import_filename in str(e.value)

assert not os.path.exists(os.path.join(extract_path, "evil_symlink"))


def test_unpack_tar_unicode(tmpdir: Path) -> None:
test_tar = tmpdir / "test.tar"
Expand Down
Loading