NixOS · edolstra · May 25, 2022 · Oct 15, 2020 · edolstra · May 20, 2022
@@ -24,3 +24,6 @@
 
   Selecting derivation outputs using the attribute selection syntax
   (e.g. `nixpkgs#glibc.dev`) no longer works.
+
+* `builtins.fetchTree` (and flake inputs) can now be used to fetch plain files
+  over the `http(s)` and `file` protocols in addition to directory tarballs.
@@ -6,6 +6,7 @@
 #include "archive.hh"
 #include "tarfile.hh"
 #include "types.hh"
+#include "split.hh"
 
 namespace nix::fetchers {
 
@@ -168,24 +169,34 @@ std::pair<Tree, time_t> downloadTarball(
     };
 }
 
-struct TarballInputScheme : InputScheme
+// An input scheme corresponding to a curable ressource
+struct CurlInputScheme : InputScheme
 {
-    std::optional<Input> inputFromURL(const ParsedURL & url) override
+    virtual const std::string inputType() const = 0;
+    const std::set<std::string> transportUrlSchemes = {"file", "http", "https"};
+
+    const bool hasTarballExtension(std::string_view path) const
     {
-        if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return {};
+        return hasSuffix(path, ".zip") || hasSuffix(path, ".tar")
+            || hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz")
+            || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
+            || hasSuffix(path, ".tar.zst");
+    }
 
-        if (!hasSuffix(url.path, ".zip")
-            && !hasSuffix(url.path, ".tar")
-            && !hasSuffix(url.path, ".tgz")
-            && !hasSuffix(url.path, ".tar.gz")
-            && !hasSuffix(url.path, ".tar.xz")
-            && !hasSuffix(url.path, ".tar.bz2")
-            && !hasSuffix(url.path, ".tar.zst"))
-            return {};
+    virtual bool isValidURL(const ParsedURL & url) const = 0;
+
+    std::optional<Input> inputFromURL(const ParsedURL & url) override
+    {
+        if (!isValidURL(url))
+            return std::nullopt;
 
         Input input;
-        input.attrs.insert_or_assign("type", "tarball");
-        input.attrs.insert_or_assign("url", url.to_string());
+
+        auto urlWithoutApplicationScheme = url;
+        urlWithoutApplicationScheme.scheme = parseUrlScheme(url.scheme).transport;
+
+        input.attrs.insert_or_assign("type", inputType());
+        input.attrs.insert_or_assign("url", urlWithoutApplicationScheme.to_string());
         auto narHash = url.query.find("narHash");
         if (narHash != url.query.end())
             input.attrs.insert_or_assign("narHash", narHash->second);
@@ -194,29 +205,27 @@ struct TarballInputScheme : InputScheme
 
     std::optional<Input> inputFromAttrs(const Attrs & attrs) override
     {
-        if (maybeGetStrAttr(attrs, "type") != "tarball") return {};
+        auto type = maybeGetStrAttr(attrs, "type");
+        if (type != inputType()) return {};
 
+        std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack"};
         for (auto & [name, value] : attrs)
-            if (name != "type" && name != "url" && /* name != "hash" && */ name != "narHash" && name != "name")
-                throw Error("unsupported tarball input attribute '%s'", name);
+            if (!allowedNames.count(name))
+                throw Error("unsupported %s input attribute '%s'", *type, name);
 
         Input input;
         input.attrs = attrs;
+
         //input.locked = (bool) maybeGetStrAttr(input.attrs, "hash");
         return input;
     }
 
     ParsedURL toURL(const Input & input) override
     {
         auto url = parseURL(getStrAttr(input.attrs, "url"));
-        // NAR hashes are preferred over file hashes since tar/zip files
-        // don't have a canonical representation.
+        // NAR hashes are preferred over file hashes since tar/zip files        // don't have a canonical representation.
         if (auto narHash = input.getNarHash())
             url.query.insert_or_assign("narHash", narHash->to_string(SRI, true));
-        /*
-        else if (auto hash = maybeGetStrAttr(input.attrs, "hash"))
-            url.query.insert_or_assign("hash", Hash(*hash).to_string(SRI, true));
-        */
         return url;
     }
 
@@ -225,6 +234,42 @@ struct TarballInputScheme : InputScheme
         return true;
     }
 
+};
+
+struct FileInputScheme : CurlInputScheme
+{
+    const std::string inputType() const override { return "file"; }
+
+    bool isValidURL(const ParsedURL & url) const override
+    {
+        auto parsedUrlScheme = parseUrlScheme(url.scheme);
+        return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
+            && (parsedUrlScheme.application
+                    ? parsedUrlScheme.application.value() == inputType()
+                    : !hasTarballExtension(url.path));
+    }
+
+    std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
+    {
+        auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
+        return {std::move(file.storePath), input};
+    }
+};
+
+struct TarballInputScheme : CurlInputScheme
+{
+    const std::string inputType() const override { return "tarball"; }
+
+    bool isValidURL(const ParsedURL & url) const override
+    {
+        auto parsedUrlScheme = parseUrlScheme(url.scheme);
+
+        return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
+            && (parsedUrlScheme.application
+                    ? parsedUrlScheme.application.value() == inputType()
+                    : hasTarballExtension(url.path));
+    }
+
     std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
     {
         auto tree = downloadTarball(store, getStrAttr(input.attrs, "url"), input.getName(), false).first;
@@ -233,5 +278,6 @@ struct TarballInputScheme : InputScheme
 };
 
 static auto rTarballInputScheme = OnStartup([] { registerInputScheme(std::make_unique<TarballInputScheme>()); });
+static auto rFileInputScheme = OnStartup([] { registerInputScheme(std::make_unique<FileInputScheme>()); });
 
 }
@@ -1,6 +1,7 @@
 #include "url.hh"
 #include "url-parts.hh"
 #include "util.hh"
+#include "split.hh"
 
 namespace nix {
 
@@ -136,4 +137,21 @@ bool ParsedURL::operator ==(const ParsedURL & other) const
         && fragment == other.fragment;
 }
 
+/**
+ * Parse a URL scheme of the form '(applicationScheme\+)?transportScheme'
+ * into a tuple '(applicationScheme, transportScheme)'
+ *
+ * > parseUrlScheme("http") == ParsedUrlScheme{ {}, "http"}
+ * > parseUrlScheme("tarball+http") == ParsedUrlScheme{ {"tarball"}, "http"}
+ */
+ParsedUrlScheme parseUrlScheme(std::string_view scheme)
+{
+    auto application = splitPrefixTo(scheme, '+');
+    auto transport = scheme;
+    return ParsedUrlScheme {
+        .application = application,
+        .transport = transport,
+    };
+}
+
 }
@@ -27,4 +27,19 @@ std::map<std::string, std::string> decodeQuery(const std::string & query);
 
 ParsedURL parseURL(const std::string & url);
 
+/*
+ * Although that’s not really standardized anywhere, an number of tools
+ * use a scheme of the form 'x+y' in urls, where y is the “transport layer”
+ * scheme, and x is the “application layer” scheme.
+ *
+ * For example git uses `git+https` to designate remotes using a Git
+ * protocol over http.
+ */
+struct ParsedUrlScheme {
+    std::optional<std::string_view> application;
+    std::string_view transport;
+};
+
+ParsedUrlScheme parseUrlScheme(std::string_view scheme);
+
 }
@@ -181,9 +181,17 @@ Currently the `type` attribute can be one of the following:
 * `tarball`: Tarballs. The location of the tarball is specified by the
   attribute `url`.
 
-  In URL form, the schema must be `http://`, `https://` or `file://`
-  URLs and the extension must be `.zip`, `.tar`, `.tgz`, `.tar.gz`,
-  `.tar.xz`, `.tar.bz2` or `.tar.zst`.
+  In URL form, the schema must be `tarball+http://`, `tarball+https://` or `tarball+file://`.
+  If the extension corresponds to a known archive format (`.zip`, `.tar`,
+  `.tgz`, `.tar.gz`, `.tar.xz`, `.tar.bz2` or `.tar.zst`), then the `tarball+`
+  can be dropped.
+
+* `file`: Plain files or directory tarballs, either over http(s) or from the local
+  disk.
+
+  In URL form, the schema must be `file+http://`, `file+https://` or `file+file://`.
+  If the extension doesn’t correspond to a known archive format (as defined by the
+  `tarball` fetcher), then the `file+` prefix can be dropped.
 
 * `github`: A more efficient way to fetch repositories from
   GitHub. The following attributes are required:

@@ -0,0 +1,105 @@
+source common.sh
+
+clearStore
+
+cd "$TEST_ROOT"
+
+test_fetch_file () {
+    echo foo > test_input
+
+    input_hash="$(nix hash path test_input)"
+
+    nix eval --impure --file - <<EOF
+    let
+        tree = builtins.fetchTree { type = "file"; url = "file://$PWD/test_input"; };
+    in
+    assert (tree.narHash == "$input_hash");
+    tree
+EOF
+}
+
+# Make sure that `http(s)` and `file` flake inputs are properly extracted when
+# they should be, and treated as opaque files when they should be
+test_file_flake_input () {
+    rm -fr "$TEST_ROOT/testFlake";
+    mkdir "$TEST_ROOT/testFlake";
+    pushd testFlake
+
+    mkdir inputs
+    echo foo > inputs/test_input_file
+    tar cfa test_input.tar.gz inputs
+    cp test_input.tar.gz test_input_no_ext
+    input_tarball_hash="$(nix hash path test_input.tar.gz)"
+    input_directory_hash="$(nix hash path inputs)"
+
+    cat <<EOF > flake.nix
+    {
+        inputs.no_ext_default_no_unpack = {
+            url = "file://$PWD/test_input_no_ext";
+            flake = false;
+        };
+        inputs.no_ext_explicit_unpack = {
+            url = "tarball+file://$PWD/test_input_no_ext";
+            flake = false;
+        };
+        inputs.tarball_default_unpack = {
+            url = "file://$PWD/test_input.tar.gz";
+            flake = false;
+        };
+        inputs.tarball_explicit_no_unpack = {
+            url = "file+file://$PWD/test_input.tar.gz";
+            flake = false;
+        };
+        outputs = { ... }: {};
+    }
+EOF
+
+    nix flake update
+    nix eval --file - <<EOF
+    with (builtins.fromJSON (builtins.readFile ./flake.lock));
+
+    # Url inputs whose extension doesn’t match a know archive format should
+    # not be unpacked by default
+    assert (nodes.no_ext_default_no_unpack.locked.type == "file");
+    assert (nodes.no_ext_default_no_unpack.locked.unpack or false == false);
+    assert (nodes.no_ext_default_no_unpack.locked.narHash == "$input_tarball_hash");
+
+    # For backwards compatibility, flake inputs that correspond to the
+    # old 'tarball' fetcher should still have their type set to 'tarball'
+    assert (nodes.tarball_default_unpack.locked.type == "tarball");
+    # Unless explicitely specified, the 'unpack' parameter shouldn’t appear here
+    # because that would break older Nix versions
+    assert (!nodes.tarball_default_unpack.locked ? unpack);
+    assert (nodes.tarball_default_unpack.locked.narHash == "$input_directory_hash");
+
+    # Explicitely passing the unpack parameter should enforce the desired behavior
+    assert (nodes.no_ext_explicit_unpack.locked.narHash == nodes.tarball_default_unpack.locked.narHash);
+    assert (nodes.tarball_explicit_no_unpack.locked.narHash == nodes.no_ext_default_no_unpack.locked.narHash);
+    true
+EOF
+    popd
+
+    [[ -z "${NIX_DAEMON_PACKAGE}" ]] && return 0
+
+    # Ensure that a lockfile generated by the current Nix for tarball inputs
+    # can still be read by an older Nix
+
+    cat <<EOF > flake.nix
+    {
+        inputs.tarball = {
+            url = "file://$PWD/test_input.tar.gz";
+            flake = false;
+        };
+        outputs = { self, tarball }: {
+            foo = builtins.readFile "${tarball}/test_input_file";
+        };
+    }
+    nix flake update
+
+    clearStore
+    "$NIX_DAEMON_PACKAGE/bin/nix" eval .#foo
+EOF
+}
+
+test_fetch_file
+test_file_flake_input
@@ -23,6 +23,7 @@ nix_tests = \
   fetchGit.sh \
   fetchurl.sh \
   fetchPath.sh \
+  fetchTree-file.sh \
   simple.sh \
   referrers.sh \
   optimise-store.sh \