From a00cefbe2f5d87b485f5baa700e6a31f07189715 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 7 Nov 2025 13:06:59 +0100
Subject: [PATCH 1/4] arg: add --cache-list argument to list cached models

---
 common/arg.cpp      | 15 +++++++++++++++
 common/common.cpp   | 33 +++++++++++++++++++++++++++++++++
 common/common.h     |  7 +++++++
 common/download.cpp | 38 +++++++++++++++++++++++++++++++++-----
 common/download.h   | 18 ++++++++++++++----
 5 files changed, 102 insertions(+), 9 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 5597de121c132..e5b587a85652d 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -740,6 +740,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             exit(0);
         }
     ));
+    add_opt(common_arg(
+        {"-cl", "--cache-list"},
+        "show list of models in cache",
+        [](common_params &) {
+            printf("model cache directory: %s\n", fs_get_cache_directory().c_str());
+            auto models = common_list_cached_models();
+            printf("number of models in cache: %zu\n", models.size());
+            for (size_t i = 0; i < models.size(); i++) {
+                auto & model = models[i];
+                auto num = std::to_string(i+1); // so that we can print trailing space
+                printf("%4s. %-50s   tag: %s\n", num.c_str(), model.name.c_str(), model.tag.c_str());
+            }
+            exit(0);
+        }
+    ));
     add_opt(common_arg(
         {"--completion-bash"},
         "print source-able bash completion script for llama.cpp",
diff --git a/common/common.cpp b/common/common.cpp
index b0591e84b0668..a8d709ab1d050 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
     return cache_directory + filename;
 }
 
+std::vector<common_file_info> fs_list_files(const std::string & path) {
+    std::vector<common_file_info> files;
+    if (path.empty()) return files;
+
+    std::filesystem::path dir(path);
+    if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
+        return files;
+    }
+
+    for (const auto & entry : std::filesystem::directory_iterator(dir)) {
+        try {
+            // Only include regular files (skip directories)
+            const auto & p = entry.path();
+            if (std::filesystem::is_regular_file(p)) {
+                common_file_info info;
+                info.path = p.string();
+                info.name = p.filename().string();
+                try {
+                    info.size = static_cast<size_t>(std::filesystem::file_size(p));
+                } catch (const std::filesystem::filesystem_error &) {
+                    info.size = 0;
+                }
+                files.push_back(std::move(info));
+            }
+        } catch (const std::filesystem::filesystem_error &) {
+            // skip entries we cannot inspect
+            continue;
+        }
+    }
+
+    return files;
+}
+
 
 //
 // Model utils
diff --git a/common/common.h b/common/common.h
index 54b7849b17448..8540725aaa476 100644
--- a/common/common.h
+++ b/common/common.h
@@ -611,6 +611,13 @@ bool fs_create_directory_with_parents(const std::string & path);
 std::string fs_get_cache_directory();
 std::string fs_get_cache_file(const std::string & filename);
 
+struct common_file_info {
+    std::string path;
+    std::string name;
+    size_t      size = 0; // in bytes
+};
+std::vector<common_file_info> fs_list_files(const std::string & path);
+
 //
 // Model utils
 //
diff --git a/common/download.cpp b/common/download.cpp
index 02d75fc0d0958..7a7aaeebb0071 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -50,6 +50,14 @@ using json = nlohmann::ordered_json;
 // downloader
 //
 
+static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
+    // we use "=" to avoid clashing with other component, while still being allowed on windows
+    std::string fname = "manifest=" + repo + "=" + tag + ".json";
+    string_replace_all(fname, "/", "_");
+    string_replace_all(fname, "\\", "_");
+    return fs_get_cache_file(fname);
+}
+
 static std::string read_file(const std::string & fname) {
     std::ifstream file(fname);
     if (!file) {
@@ -829,17 +837,13 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
     // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
     // User-Agent header is already set in common_remote_get_content, no need to set it here
 
-    // we use "=" to avoid clashing with other component, while still being allowed on windows
-    std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
-    string_replace_all(cached_response_fname, "/", "_");
-    std::string cached_response_path = fs_get_cache_file(cached_response_fname);
-
     // make the request
     common_remote_params params;
     params.headers = headers;
     long res_code = 0;
     std::string res_str;
     bool use_cache = false;
+    std::string cached_response_path = get_manifest_path(hf_repo, tag);
     if (!offline) {
         try {
             auto res = common_remote_get_content(url, params);
@@ -895,6 +899,29 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
     return { hf_repo, ggufFile, mmprojFile };
 }
 
+std::vector<common_cached_model_info> common_list_cached_models() {
+    std::vector<common_cached_model_info> models;
+    const std::string cache_dir = fs_get_cache_directory();
+    const std::vector<common_file_info> files = fs_list_files(cache_dir);
+    for (const auto & file : files) {
+        if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
+            common_cached_model_info model_info;
+            model_info.manifest_path = file.path;
+            std::string fname = file.name;
+            string_replace_all(fname, ".json", ""); // remove extension
+            auto parts = string_split<std::string>(fname, '=');
+            if (parts.size() != 3) {
+                continue;
+            }
+            model_info.name = parts[1];
+            model_info.tag  = parts[2];
+            model_info.size = 0; // TODO: get GGUF size, not manifest size
+            models.push_back(model_info);
+        }
+    }
+    return models;
+}
+
 //
 // Docker registry functions
 //
@@ -959,6 +986,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
         std::string token = common_docker_get_token(repo);  // Get authentication token
 
         // Get manifest
+        // TODO: cache the manifest response so that it appears in the model list
         const std::string    url_prefix = "https://registry-1.docker.io/v2/" + repo;
         std::string          manifest_url = url_prefix + "/manifests/" + tag;
         common_remote_params manifest_params;
diff --git a/common/download.h b/common/download.h
index ddf36155ef818..307ee56ecb1f3 100644
--- a/common/download.h
+++ b/common/download.h
@@ -8,16 +8,19 @@ struct common_params_model;
 // download functionalities
 //
 
+struct common_cached_model_info {
+    std::string manifest_path;
+    std::string name; // note: this is not "repo", slashes are replaced with underscores
+    std::string tag;
+    size_t      size = 0; // GGUF size in bytes
+};
+
 struct common_hf_file_res {
     std::string repo; // repo name with ":tag" removed
     std::string ggufFile;
     std::string mmprojFile;
 };
 
-// resolve and download model from Docker registry
-// return local path to downloaded model file
-std::string common_docker_resolve_model(const std::string & docker);
-
 /**
  * Allow getting the HF file from the HF repo with tag (like ollama), for example:
  * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -39,3 +42,10 @@ bool common_download_model(
     const common_params_model & model,
     const std::string & bearer_token,
     bool offline);
+
+// returns list of cached models
+std::vector<common_cached_model_info> common_list_cached_models();
+
+// resolve and download model from Docker registry
+// return local path to downloaded model file
+std::string common_docker_resolve_model(const std::string & docker);

From 12463c4f25a06cd7492b80fcf63fb44bbbbc25ca Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 7 Nov 2025 16:05:26 +0100
Subject: [PATCH 2/4] new manifest naming format

---
 common/arg.cpp      |  2 +-
 common/download.cpp | 22 +++++++++++++++++-----
 common/download.h   |  6 +++++-
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index e5b587a85652d..e84ef25737dd1 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -750,7 +750,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             for (size_t i = 0; i < models.size(); i++) {
                 auto & model = models[i];
                 auto num = std::to_string(i+1); // so that we can print trailing space
-                printf("%4s. %-50s   tag: %s\n", num.c_str(), model.name.c_str(), model.tag.c_str());
+                printf("%4s. %s\n", num.c_str(), model.to_string().c_str());
             }
             exit(0);
         }
diff --git a/common/download.cpp b/common/download.cpp
index 7a7aaeebb0071..a2f3cfa7e316a 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -50,11 +50,19 @@ using json = nlohmann::ordered_json;
 // downloader
 //
 
+// validate repo name format: owner/repo
+static bool validate_repo_name(const std::string & repo) {
+    static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)");
+    return std::regex_match(repo, repo_regex);
+}
+
 static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
     // we use "=" to avoid clashing with other component, while still being allowed on windows
     std::string fname = "manifest=" + repo + "=" + tag + ".json";
-    string_replace_all(fname, "/", "_");
-    string_replace_all(fname, "\\", "_");
+    if (!validate_repo_name(repo)) {
+        throw std::runtime_error("error: repo name must be in the format 'owner/repo'");
+    }
+    string_replace_all(fname, "/", "=");
     return fs_get_cache_file(fname);
 }
 
@@ -910,11 +918,15 @@ std::vector<common_cached_model_info> common_list_cached_models() {
             std::string fname = file.name;
             string_replace_all(fname, ".json", ""); // remove extension
             auto parts = string_split<std::string>(fname, '=');
-            if (parts.size() != 3) {
+            if (parts.size() == 4) {
+                // expect format: manifest=<user>=<repo>=<tag>=<other>
+                model_info.user = parts[1];
+                model_info.repo = parts[2];
+                model_info.tag  = parts[3];
+            } else {
+                // invalid format
                 continue;
             }
-            model_info.name = parts[1];
-            model_info.tag  = parts[2];
             model_info.size = 0; // TODO: get GGUF size, not manifest size
             models.push_back(model_info);
         }
diff --git a/common/download.h b/common/download.h
index 307ee56ecb1f3..25bef32f17d4e 100644
--- a/common/download.h
+++ b/common/download.h
@@ -10,9 +10,13 @@ struct common_params_model;
 
 struct common_cached_model_info {
     std::string manifest_path;
-    std::string name; // note: this is not "repo", slashes are replaced with underscores
+    std::string user;
+    std::string repo;
     std::string tag;
     size_t      size = 0; // GGUF size in bytes
+    std::string to_string() const {
+        return user + "/" + repo + ":" + tag;
+    }
 };
 
 struct common_hf_file_res {

From e40850a625c5208bf84b4d21278c35cc2c068e3d Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 7 Nov 2025 16:07:21 +0100
Subject: [PATCH 3/4] improve naming

---
 common/download.cpp | 8 ++++----
 common/download.h   | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/common/download.cpp b/common/download.cpp
index a2f3cfa7e316a..57308a5c6d536 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -919,10 +919,10 @@ std::vector<common_cached_model_info> common_list_cached_models() {
             string_replace_all(fname, ".json", ""); // remove extension
             auto parts = string_split<std::string>(fname, '=');
             if (parts.size() == 4) {
-                // expect format: manifest=<user>=<repo>=<tag>=<other>
-                model_info.user = parts[1];
-                model_info.repo = parts[2];
-                model_info.tag  = parts[3];
+                // expect format: manifest=<user>=<model>=<tag>=<other>
+                model_info.user  = parts[1];
+                model_info.model = parts[2];
+                model_info.tag   = parts[3];
             } else {
                 // invalid format
                 continue;
diff --git a/common/download.h b/common/download.h
index 25bef32f17d4e..45a6bd6bba859 100644
--- a/common/download.h
+++ b/common/download.h
@@ -11,11 +11,11 @@ struct common_params_model;
 struct common_cached_model_info {
     std::string manifest_path;
     std::string user;
-    std::string repo;
+    std::string model;
     std::string tag;
     size_t      size = 0; // GGUF size in bytes
     std::string to_string() const {
-        return user + "/" + repo + ":" + tag;
+        return user + "/" + model + ":" + tag;
     }
 };
 

From e0677011600c3d5d83be91c3caea33d396faf754 Mon Sep 17 00:00:00 2001
From: Xuan-Son Nguyen <thichthat@gmail.com>
Date: Sat, 8 Nov 2025 21:51:08 +0100
Subject: [PATCH 4/4] Update common/arg.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 common/arg.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index e84ef25737dd1..a570810281499 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -749,8 +749,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             printf("number of models in cache: %zu\n", models.size());
             for (size_t i = 0; i < models.size(); i++) {
                 auto & model = models[i];
-                auto num = std::to_string(i+1); // so that we can print trailing space
-                printf("%4s. %s\n", num.c_str(), model.to_string().c_str());
+                printf("%4d. %s\n", (int) i + 1, model.to_string().c_str());
             }
             exit(0);
         }