Skip to content

Commit 89a8955

Browse files
committed
Merge remote-tracking branch 'origin/file-fetcher'
2 parents 2f8a34c + 5b8c1de commit 89a8955

File tree

7 files changed

+221
-25
lines changed

7 files changed

+221
-25
lines changed

doc/manual/src/release-notes/rl-next.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@
2424

2525
Selecting derivation outputs using the attribute selection syntax
2626
(e.g. `nixpkgs#glibc.dev`) no longer works.
27+
28+
* `builtins.fetchTree` (and flake inputs) can now be used to fetch plain files
29+
over the `http(s)` and `file` protocols in addition to directory tarballs.

src/libfetchers/tarball.cc

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "archive.hh"
77
#include "tarfile.hh"
88
#include "types.hh"
9+
#include "split.hh"
910

1011
namespace nix::fetchers {
1112

@@ -168,24 +169,34 @@ std::pair<Tree, time_t> downloadTarball(
168169
};
169170
}
170171

171-
struct TarballInputScheme : InputScheme
172+
// An input scheme corresponding to a curable ressource
173+
struct CurlInputScheme : InputScheme
172174
{
173-
std::optional<Input> inputFromURL(const ParsedURL & url) override
175+
virtual const std::string inputType() const = 0;
176+
const std::set<std::string> transportUrlSchemes = {"file", "http", "https"};
177+
178+
const bool hasTarballExtension(std::string_view path) const
174179
{
175-
if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {};
180+
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar")
181+
|| hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz")
182+
|| hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
183+
|| hasSuffix(path, ".tar.zst");
184+
}
176185

177-
if (!hasSuffix(url.path, ".zip")
178-
&& !hasSuffix(url.path, ".tar")
179-
&& !hasSuffix(url.path, ".tgz")
180-
&& !hasSuffix(url.path, ".tar.gz")
181-
&& !hasSuffix(url.path, ".tar.xz")
182-
&& !hasSuffix(url.path, ".tar.bz2")
183-
&& !hasSuffix(url.path, ".tar.zst"))
184-
return {};
186+
virtual bool isValidURL(const ParsedURL & url) const = 0;
187+
188+
std::optional<Input> inputFromURL(const ParsedURL & url) override
189+
{
190+
if (!isValidURL(url))
191+
return std::nullopt;
185192

186193
Input input;
187-
input.attrs.insert_or_assign("type", "tarball");
188-
input.attrs.insert_or_assign("url", url.to_string());
194+
195+
auto urlWithoutApplicationScheme = url;
196+
urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;
197+
198+
input.attrs.insert_or_assign("type", inputType());
199+
input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
189200
auto narHash = url.query.find("narHash");
190201
if (narHash != url.query.end())
191202
input.attrs.insert_or_assign("narHash", narHash->second);
@@ -194,29 +205,27 @@ struct TarballInputScheme : InputScheme
194205

195206
std::optional<Input> inputFromAttrs(const Attrs & attrs) override
196207
{
197-
if (maybeGetStrAttr(attrs, "type") != "tarball") return {};
208+
auto type = maybeGetStrAttr(attrs, "type");
209+
if (type != inputType()) return {};
198210

211+
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
199212
for (auto & [name, value] : attrs)
200-
if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name")
201-
throw Error("unsupported tarball input attribute '%s'", name);
213+
if (!allowedNames.count(name))
214+
throw Error("unsupported %s input attribute '%s'", *type, name);
202215

203216
Input input;
204217
input.attrs = attrs;
218+
205219
//input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
206220
return input;
207221
}
208222

209223
ParsedURL toURL(const Input & input) override
210224
{
211225
auto url = parseURL(getStrAttr(input.attrs, "url"));
212-
// NAR hashes are preferred over file hashes since tar/zip files
213-
// don't have a canonical representation.
226+
// NAR hashes are preferred over file hashes since tar/zip files // don't have a canonical representation.
214227
if (auto narHash = input.getNarHash())
215228
url.query.insert_or_assign("narHash", narHash->to_string(SRI, true));
216-
/*
217-
else if (auto hash = maybeGetStrAttr(input.attrs, "hash"))
218-
url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true));
219-
*/
220229
return url;
221230
}
222231

@@ -225,6 +234,42 @@ struct TarballInputScheme : InputScheme
225234
return true;
226235
}
227236

237+
};
238+
239+
struct FileInputScheme : CurlInputScheme
240+
{
241+
const std::string inputType() const override { return "file"; }
242+
243+
bool isValidURL(const ParsedURL & url) const override
244+
{
245+
auto parsedUrlScheme = parseUrlScheme(url.scheme);
246+
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
247+
&& (parsedUrlScheme.application
248+
? parsedUrlScheme.application.value() == inputType()
249+
: !hasTarballExtension(url.path));
250+
}
251+
252+
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
253+
{
254+
auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
255+
return {std::move(file.storePath), input};
256+
}
257+
};
258+
259+
struct TarballInputScheme : CurlInputScheme
260+
{
261+
const std::string inputType() const override { return "tarball"; }
262+
263+
bool isValidURL(const ParsedURL & url) const override
264+
{
265+
auto parsedUrlScheme = parseUrlScheme(url.scheme);
266+
267+
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
268+
&& (parsedUrlScheme.application
269+
? parsedUrlScheme.application.value() == inputType()
270+
: hasTarballExtension(url.path));
271+
}
272+
228273
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
229274
{
230275
auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
@@ -233,5 +278,6 @@ struct TarballInputScheme : InputScheme
233278
};
234279

235280
static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
281+
static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });
236282

237283
}

src/libutil/url.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "url.hh"
22
#include "url-parts.hh"
33
#include "util.hh"
4+
#include "split.hh"
45

56
namespace nix {
67

@@ -136,4 +137,21 @@ bool ParsedURL::operator ==(const ParsedURL & other) const
136137
&& fragment == other.fragment;
137138
}
138139

140+
/**
141+
* Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
142+
* into a tuple '(applicationScheme, transportScheme)'
143+
*
144+
* > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
145+
* > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
146+
*/
147+
ParsedUrlScheme parseUrlScheme(std::string_view scheme)
148+
{
149+
auto application = splitPrefixTo(scheme, '+');
150+
auto transport = scheme;
151+
return ParsedUrlScheme {
152+
.application = application,
153+
.transport = transport,
154+
};
155+
}
156+
139157
}

src/libutil/url.hh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,19 @@ std::map<std::string, std::string> decodeQuery(const std::string & query);
2727

2828
ParsedURL parseURL(const std::string & url);
2929

30+
/*
31+
* Although that’s not really standardized anywhere, an number of tools
32+
* use a scheme of the form 'x+y' in urls, where y is the “transport layer”
33+
* scheme, and x is the “application layer” scheme.
34+
*
35+
* For example git uses `git+https` to designate remotes using a Git
36+
* protocol over http.
37+
*/
38+
struct ParsedUrlScheme {
39+
std::optional<std::string_view> application;
40+
std::string_view transport;
41+
};
42+
43+
ParsedUrlScheme parseUrlScheme(std::string_view scheme);
44+
3045
}

src/nix/flake.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,17 @@ Currently the `type` attribute can be one of the following:
181181
* `tarball`: Tarballs. The location of the tarball is specified by the
182182
attribute `url`.
183183

184-
In URL form, the schema must be `http://`, `https://` or `file://`
185-
URLs and the extension must be `.zip`, `.tar`, `.tgz`, `.tar.gz`,
186-
`.tar.xz`, `.tar.bz2` or `.tar.zst`.
184+
In URL form, the schema must be `tarball+http://`, `tarball+https://` or `tarball+file://`.
185+
If the extension corresponds to a known archive format (`.zip`, `.tar`,
186+
`.tgz`, `.tar.gz`, `.tar.xz`, `.tar.bz2` or `.tar.zst`), then the `tarball+`
187+
can be dropped.
188+
189+
* `file`: Plain files or directory tarballs, either over http(s) or from the local
190+
disk.
191+
192+
In URL form, the schema must be `file+http://`, `file+https://` or `file+file://`.
193+
If the extension doesn’t correspond to a known archive format (as defined by the
194+
`tarball` fetcher), then the `file+` prefix can be dropped.
187195

188196
* `github`: A more efficient way to fetch repositories from
189197
GitHub. The following attributes are required:

tests/fetchTree-file.sh

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
source common.sh
2+
3+
clearStore
4+
5+
cd "$TEST_ROOT"
6+
7+
test_fetch_file () {
8+
echo foo > test_input
9+
10+
input_hash="$(nix hash path test_input)"
11+
12+
nix eval --impure --file - <<EOF
13+
let
14+
tree = builtins.fetchTree { type = "file"; url = "file://$PWD/test_input"; };
15+
in
16+
assert (tree.narHash == "$input_hash");
17+
tree
18+
EOF
19+
}
20+
21+
# Make sure that `http(s)` and `file` flake inputs are properly extracted when
22+
# they should be, and treated as opaque files when they should be
23+
test_file_flake_input () {
24+
rm -fr "$TEST_ROOT/testFlake";
25+
mkdir "$TEST_ROOT/testFlake";
26+
pushd testFlake
27+
28+
mkdir inputs
29+
echo foo > inputs/test_input_file
30+
tar cfa test_input.tar.gz inputs
31+
cp test_input.tar.gz test_input_no_ext
32+
input_tarball_hash="$(nix hash path test_input.tar.gz)"
33+
input_directory_hash="$(nix hash path inputs)"
34+
35+
cat <<EOF > flake.nix
36+
{
37+
inputs.no_ext_default_no_unpack = {
38+
url = "file://$PWD/test_input_no_ext";
39+
flake = false;
40+
};
41+
inputs.no_ext_explicit_unpack = {
42+
url = "tarball+file://$PWD/test_input_no_ext";
43+
flake = false;
44+
};
45+
inputs.tarball_default_unpack = {
46+
url = "file://$PWD/test_input.tar.gz";
47+
flake = false;
48+
};
49+
inputs.tarball_explicit_no_unpack = {
50+
url = "file+file://$PWD/test_input.tar.gz";
51+
flake = false;
52+
};
53+
outputs = { ... }: {};
54+
}
55+
EOF
56+
57+
nix flake update
58+
nix eval --file - <<EOF
59+
with (builtins.fromJSON (builtins.readFile ./flake.lock));
60+
61+
# Url inputs whose extension doesn’t match a know archive format should
62+
# not be unpacked by default
63+
assert (nodes.no_ext_default_no_unpack.locked.type == "file");
64+
assert (nodes.no_ext_default_no_unpack.locked.unpack or false == false);
65+
assert (nodes.no_ext_default_no_unpack.locked.narHash == "$input_tarball_hash");
66+
67+
# For backwards compatibility, flake inputs that correspond to the
68+
# old 'tarball' fetcher should still have their type set to 'tarball'
69+
assert (nodes.tarball_default_unpack.locked.type == "tarball");
70+
# Unless explicitely specified, the 'unpack' parameter shouldn’t appear here
71+
# because that would break older Nix versions
72+
assert (!nodes.tarball_default_unpack.locked ? unpack);
73+
assert (nodes.tarball_default_unpack.locked.narHash == "$input_directory_hash");
74+
75+
# Explicitely passing the unpack parameter should enforce the desired behavior
76+
assert (nodes.no_ext_explicit_unpack.locked.narHash == nodes.tarball_default_unpack.locked.narHash);
77+
assert (nodes.tarball_explicit_no_unpack.locked.narHash == nodes.no_ext_default_no_unpack.locked.narHash);
78+
true
79+
EOF
80+
popd
81+
82+
[[ -z "${NIX_DAEMON_PACKAGE}" ]] && return 0
83+
84+
# Ensure that a lockfile generated by the current Nix for tarball inputs
85+
# can still be read by an older Nix
86+
87+
cat <<EOF > flake.nix
88+
{
89+
inputs.tarball = {
90+
url = "file://$PWD/test_input.tar.gz";
91+
flake = false;
92+
};
93+
outputs = { self, tarball }: {
94+
foo = builtins.readFile "${tarball}/test_input_file";
95+
};
96+
}
97+
nix flake update
98+
99+
clearStore
100+
"$NIX_DAEMON_PACKAGE/bin/nix" eval .#foo
101+
EOF
102+
}
103+
104+
test_fetch_file
105+
test_file_flake_input

tests/local.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ nix_tests = \
2323
fetchGit.sh \
2424
fetchurl.sh \
2525
fetchPath.sh \
26+
fetchTree-file.sh \
2627
simple.sh \
2728
referrers.sh \
2829
optimise-store.sh \

0 commit comments

Comments
 (0)