Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/manual/src/release-notes/rl-next.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@

Selecting derivation outputs using the attribute selection syntax
(e.g. `nixpkgs#glibc.dev`) no longer works.

* `builtins.fetchTree` (and flake inputs) can now be used to fetch plain files
over the `http(s)` and `file` protocols in addition to directory tarballs.
90 changes: 68 additions & 22 deletions src/libfetchers/tarball.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "archive.hh"
#include "tarfile.hh"
#include "types.hh"
#include "split.hh"

namespace nix::fetchers {

Expand Down Expand Up @@ -168,24 +169,34 @@ std::pair<Tree, time_t> downloadTarball(
};
}

struct TarballInputScheme : InputScheme
// An input scheme corresponding to a curable ressource
struct CurlInputScheme : InputScheme
{
std::optional<Input> inputFromURL(const ParsedURL & url) override
virtual const std::string inputType() const = 0;
const std::set<std::string> transportUrlSchemes = {"file", "http", "https"};

const bool hasTarballExtension(std::string_view path) const
{
if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {};
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar")
|| hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz")
|| hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
|| hasSuffix(path, ".tar.zst");
}

if (!hasSuffix(url.path, ".zip")
&& !hasSuffix(url.path, ".tar")
&& !hasSuffix(url.path, ".tgz")
&& !hasSuffix(url.path, ".tar.gz")
&& !hasSuffix(url.path, ".tar.xz")
&& !hasSuffix(url.path, ".tar.bz2")
&& !hasSuffix(url.path, ".tar.zst"))
return {};
virtual bool isValidURL(const ParsedURL & url) const = 0;

std::optional<Input> inputFromURL(const ParsedURL & url) override
{
if (!isValidURL(url))
return std::nullopt;

Input input;
input.attrs.insert_or_assign("type", "tarball");
input.attrs.insert_or_assign("url", url.to_string());

auto urlWithoutApplicationScheme = url;
urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;

input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
Expand All @@ -194,29 +205,27 @@ struct TarballInputScheme : InputScheme

std::optional<Input> inputFromAttrs(const Attrs & attrs) override
{
if (maybeGetStrAttr(attrs, "type") != "tarball") return {};
auto type = maybeGetStrAttr(attrs, "type");
if (type != inputType()) return {};

std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
for (auto & [name, value] : attrs)
if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name")
throw Error("unsupported tarball input attribute '%s'", name);
if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'", *type, name);

Input input;
input.attrs = attrs;

//input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
return input;
}

ParsedURL toURL(const Input & input) override
{
auto url = parseURL(getStrAttr(input.attrs, "url"));
// NAR hashes are preferred over file hashes since tar/zip files
// don't have a canonical representation.
// NAR hashes are preferred over file hashes since tar/zip files // don't have a canonical representation.
if (auto narHash = input.getNarHash())
url.query.insert_or_assign("narHash", narHash->to_string(SRI, true));
/*
else if (auto hash = maybeGetStrAttr(input.attrs, "hash"))
url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true));
*/
return url;
}

Expand All @@ -225,6 +234,42 @@ struct TarballInputScheme : InputScheme
return true;
}

};

struct FileInputScheme : CurlInputScheme
{
const std::string inputType() const override { return "file"; }

bool isValidURL(const ParsedURL & url) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application
? parsedUrlScheme.application.value() == inputType()
: !hasTarballExtension(url.path));
}

std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A problem with the use of downloadFile() is that it uses FileIngestionMethod::Flat instead of FileIngestionMethod::Recursive. Currently it's assumed that all flake inputs use recursive+sha256 with a name of "source". This allows inputs to be substituted using the narHash attribute in the lock file (see Input::computeStorePath()). However, the lazy trees branch will probably remove the ability to substitute inputs anyway...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh indeed, I didn’t notice the Flat here. Would it be an issue to make downloadFile use Recursive? Afaik none of the other use sites really care about how the result is hashed since it’s only used internally by the fetchers but doesn’t leak outside.

(Alternatively I could just wrap that by adding an extra step that would copy the output to a Recursive CA location, but if that can be avoided it’s even better)

return {std::move(file.storePath), input};
}
};

struct TarballInputScheme : CurlInputScheme
{
const std::string inputType() const override { return "tarball"; }

bool isValidURL(const ParsedURL & url) const override
{
auto parsedUrlScheme = parseUrlScheme(url.scheme);

return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
&& (parsedUrlScheme.application
? parsedUrlScheme.application.value() == inputType()
: hasTarballExtension(url.path));
}

std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
{
auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
Expand All @@ -233,5 +278,6 @@ struct TarballInputScheme : InputScheme
};

static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });

}
18 changes: 18 additions & 0 deletions src/libutil/url.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "url.hh"
#include "url-parts.hh"
#include "util.hh"
#include "split.hh"

namespace nix {

Expand Down Expand Up @@ -136,4 +137,21 @@ bool ParsedURL::operator ==(const ParsedURL & other) const
&& fragment == other.fragment;
}

/**
* Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
* into a tuple '(applicationScheme, transportScheme)'
*
* > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
* > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
*/
ParsedUrlScheme parseUrlScheme(std::string_view scheme)
{
auto application = splitPrefixTo(scheme, '+');
auto transport = scheme;
return ParsedUrlScheme {
.application = application,
.transport = transport,
};
}

}
15 changes: 15 additions & 0 deletions src/libutil/url.hh
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,19 @@ std::map<std::string, std::string> decodeQuery(const std::string & query);

ParsedURL parseURL(const std::string & url);

/*
* Although that’s not really standardized anywhere, an number of tools
* use a scheme of the form 'x+y' in urls, where y is the “transport layer”
* scheme, and x is the “application layer” scheme.
*
* For example git uses `git+https` to designate remotes using a Git
* protocol over http.
*/
struct ParsedUrlScheme {
std::optional<std::string_view> application;
std::string_view transport;
};

ParsedUrlScheme parseUrlScheme(std::string_view scheme);

}
14 changes: 11 additions & 3 deletions src/nix/flake.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,17 @@ Currently the `type` attribute can be one of the following:
* `tarball`: Tarballs. The location of the tarball is specified by the
attribute `url`.

In URL form, the schema must be `http://`, `https://` or `file://`
URLs and the extension must be `.zip`, `.tar`, `.tgz`, `.tar.gz`,
`.tar.xz`, `.tar.bz2` or `.tar.zst`.
In URL form, the schema must be `tarball+http://`, `tarball+https://` or `tarball+file://`.
If the extension corresponds to a known archive format (`.zip`, `.tar`,
`.tgz`, `.tar.gz`, `.tar.xz`, `.tar.bz2` or `.tar.zst`), then the `tarball+`
can be dropped.
Comment on lines +184 to +187
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't that change breaking backwards compatibility?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, on the contrary: Currently http(s)/file urls are required to have such an extension and are treated as tarballs. The new behavior makes it so that what was already accepted still is, and with the same semantics, and the rest is treated as file urls

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problematic bit though, is that adding a new tarball extension would break backwards-compat a bit: with this change, https://foo.com/bar.tar.lzma is fetched as a plain file, and if we wanted to add .tar.lzma as a recognized archive extension, it would become an directory tarball


* `file`: Plain files or directory tarballs, either over http(s) or from the local
disk.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well rename the function to builtins.fetch if it downloads arbitrary things, even non-tree ones.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be an idea, indeed :)

I’d leave that for later though if you’re OK with that


In URL form, the schema must be `file+http://`, `file+https://` or `file+file://`.
If the extension doesn’t correspond to a known archive format (as defined by the
`tarball` fetcher), then the `file+` prefix can be dropped.

* `github`: A more efficient way to fetch repositories from
GitHub. The following attributes are required:
Expand Down
105 changes: 105 additions & 0 deletions tests/fetchTree-file.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
source common.sh

clearStore

cd "$TEST_ROOT"

test_fetch_file () {
echo foo > test_input

input_hash="$(nix hash path test_input)"

nix eval --impure --file - <<EOF
let
tree = builtins.fetchTree { type = "file"; url = "file://$PWD/test_input"; };
in
assert (tree.narHash == "$input_hash");
tree
EOF
}

# Make sure that `http(s)` and `file` flake inputs are properly extracted when
# they should be, and treated as opaque files when they should be
test_file_flake_input () {
rm -fr "$TEST_ROOT/testFlake";
mkdir "$TEST_ROOT/testFlake";
pushd testFlake

mkdir inputs
echo foo > inputs/test_input_file
tar cfa test_input.tar.gz inputs
cp test_input.tar.gz test_input_no_ext
input_tarball_hash="$(nix hash path test_input.tar.gz)"
input_directory_hash="$(nix hash path inputs)"

cat <<EOF > flake.nix
{
inputs.no_ext_default_no_unpack = {
url = "file://$PWD/test_input_no_ext";
flake = false;
};
inputs.no_ext_explicit_unpack = {
url = "tarball+file://$PWD/test_input_no_ext";
flake = false;
};
inputs.tarball_default_unpack = {
url = "file://$PWD/test_input.tar.gz";
flake = false;
};
inputs.tarball_explicit_no_unpack = {
url = "file+file://$PWD/test_input.tar.gz";
flake = false;
};
outputs = { ... }: {};
}
EOF

nix flake update
nix eval --file - <<EOF
with (builtins.fromJSON (builtins.readFile ./flake.lock));

# Url inputs whose extension doesn’t match a know archive format should
# not be unpacked by default
assert (nodes.no_ext_default_no_unpack.locked.type == "file");
assert (nodes.no_ext_default_no_unpack.locked.unpack or false == false);
assert (nodes.no_ext_default_no_unpack.locked.narHash == "$input_tarball_hash");

# For backwards compatibility, flake inputs that correspond to the
# old 'tarball' fetcher should still have their type set to 'tarball'
assert (nodes.tarball_default_unpack.locked.type == "tarball");
# Unless explicitely specified, the 'unpack' parameter shouldn’t appear here
# because that would break older Nix versions
assert (!nodes.tarball_default_unpack.locked ? unpack);
assert (nodes.tarball_default_unpack.locked.narHash == "$input_directory_hash");

# Explicitely passing the unpack parameter should enforce the desired behavior
assert (nodes.no_ext_explicit_unpack.locked.narHash == nodes.tarball_default_unpack.locked.narHash);
assert (nodes.tarball_explicit_no_unpack.locked.narHash == nodes.no_ext_default_no_unpack.locked.narHash);
true
EOF
popd

[[ -z "${NIX_DAEMON_PACKAGE}" ]] && return 0

# Ensure that a lockfile generated by the current Nix for tarball inputs
# can still be read by an older Nix

cat <<EOF > flake.nix
{
inputs.tarball = {
url = "file://$PWD/test_input.tar.gz";
flake = false;
};
outputs = { self, tarball }: {
foo = builtins.readFile "${tarball}/test_input_file";
};
}
nix flake update

clearStore
"$NIX_DAEMON_PACKAGE/bin/nix" eval .#foo
EOF
}

test_fetch_file
test_file_flake_input
1 change: 1 addition & 0 deletions tests/local.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ nix_tests = \
fetchGit.sh \
fetchurl.sh \
fetchPath.sh \
fetchTree-file.sh \
simple.sh \
referrers.sh \
optimise-store.sh \
Expand Down