From a00cefbe2f5d87b485f5baa700e6a31f07189715 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 7 Nov 2025 13:06:59 +0100 Subject: [PATCH 1/4] arg: add --cache-list argument to list cached models --- common/arg.cpp | 15 +++++++++++++++ common/common.cpp | 33 +++++++++++++++++++++++++++++++++ common/common.h | 7 +++++++ common/download.cpp | 38 +++++++++++++++++++++++++++++++++----- common/download.h | 18 ++++++++++++++---- 5 files changed, 102 insertions(+), 9 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 5597de121c132..e5b587a85652d 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -740,6 +740,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex exit(0); } )); + add_opt(common_arg( + {"-cl", "--cache-list"}, + "show list of models in cache", + [](common_params &) { + printf("model cache directory: %s\n", fs_get_cache_directory().c_str()); + auto models = common_list_cached_models(); + printf("number of models in cache: %zu\n", models.size()); + for (size_t i = 0; i < models.size(); i++) { + auto & model = models[i]; + auto num = std::to_string(i+1); // so that we can print trailing space + printf("%4s. %-50s tag: %s\n", num.c_str(), model.name.c_str(), model.tag.c_str()); + } + exit(0); + } + )); add_opt(common_arg( {"--completion-bash"}, "print source-able bash completion script for llama.cpp", diff --git a/common/common.cpp b/common/common.cpp index b0591e84b0668..a8d709ab1d050 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) { return cache_directory + filename; } +std::vector fs_list_files(const std::string & path) { + std::vector files; + if (path.empty()) return files; + + std::filesystem::path dir(path); + if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) { + return files; + } + + for (const auto & entry : std::filesystem::directory_iterator(dir)) { + try { + // Only include regular files (skip directories) + const auto & p = entry.path(); + if (std::filesystem::is_regular_file(p)) { + common_file_info info; + info.path = p.string(); + info.name = p.filename().string(); + try { + info.size = static_cast(std::filesystem::file_size(p)); + } catch (const std::filesystem::filesystem_error &) { + info.size = 0; + } + files.push_back(std::move(info)); + } + } catch (const std::filesystem::filesystem_error &) { + // skip entries we cannot inspect + continue; + } + } + + return files; +} + // // Model utils diff --git a/common/common.h b/common/common.h index 54b7849b17448..8540725aaa476 100644 --- a/common/common.h +++ b/common/common.h @@ -611,6 +611,13 @@ bool fs_create_directory_with_parents(const std::string & path); std::string fs_get_cache_directory(); std::string fs_get_cache_file(const std::string & filename); +struct common_file_info { + std::string path; + std::string name; + size_t size = 0; // in bytes +}; +std::vector fs_list_files(const std::string & path); + // // Model utils // diff --git a/common/download.cpp b/common/download.cpp index 02d75fc0d0958..7a7aaeebb0071 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -50,6 +50,14 @@ using json = nlohmann::ordered_json; // downloader // +static std::string get_manifest_path(const std::string & repo, const std::string & tag) { + // we use "=" to avoid clashing with other component, while still being allowed on windows + std::string fname = "manifest=" + repo + "=" + tag + ".json"; + string_replace_all(fname, "/", "_"); + string_replace_all(fname, "\\", "_"); + return fs_get_cache_file(fname); +} + static std::string read_file(const std::string & fname) { std::ifstream file(fname); if (!file) { @@ -829,17 +837,13 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response // User-Agent header is already set in common_remote_get_content, no need to set it here - // we use "=" to avoid clashing with other component, while still being allowed on windows - std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json"; - string_replace_all(cached_response_fname, "/", "_"); - std::string cached_response_path = fs_get_cache_file(cached_response_fname); - // make the request common_remote_params params; params.headers = headers; long res_code = 0; std::string res_str; bool use_cache = false; + std::string cached_response_path = get_manifest_path(hf_repo, tag); if (!offline) { try { auto res = common_remote_get_content(url, params); @@ -895,6 +899,29 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons return { hf_repo, ggufFile, mmprojFile }; } +std::vector common_list_cached_models() { + std::vector models; + const std::string cache_dir = fs_get_cache_directory(); + const std::vector files = fs_list_files(cache_dir); + for (const auto & file : files) { + if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) { + common_cached_model_info model_info; + model_info.manifest_path = file.path; + std::string fname = file.name; + string_replace_all(fname, ".json", ""); // remove extension + auto parts = string_split(fname, '='); + if (parts.size() != 3) { + continue; + } + model_info.name = parts[1]; + model_info.tag = parts[2]; + model_info.size = 0; // TODO: get GGUF size, not manifest size + models.push_back(model_info); + } + } + return models; +} + // // Docker registry functions // @@ -959,6 +986,7 @@ std::string common_docker_resolve_model(const std::string & docker) { std::string token = common_docker_get_token(repo); // Get authentication token // Get manifest + // TODO: cache the manifest response so that it appears in the model list const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo; std::string manifest_url = url_prefix + "/manifests/" + tag; common_remote_params manifest_params; diff --git a/common/download.h b/common/download.h index ddf36155ef818..307ee56ecb1f3 100644 --- a/common/download.h +++ b/common/download.h @@ -8,16 +8,19 @@ struct common_params_model; // download functionalities // +struct common_cached_model_info { + std::string manifest_path; + std::string name; // note: this is not "repo", slashes are replaced with underscores + std::string tag; + size_t size = 0; // GGUF size in bytes +}; + struct common_hf_file_res { std::string repo; // repo name with ":tag" removed std::string ggufFile; std::string mmprojFile; }; -// resolve and download model from Docker registry -// return local path to downloaded model file -std::string common_docker_resolve_model(const std::string & docker); - /** * Allow getting the HF file from the HF repo with tag (like ollama), for example: * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 @@ -39,3 +42,10 @@ bool common_download_model( const common_params_model & model, const std::string & bearer_token, bool offline); + +// returns list of cached models +std::vector common_list_cached_models(); + +// resolve and download model from Docker registry +// return local path to downloaded model file +std::string common_docker_resolve_model(const std::string & docker); From 12463c4f25a06cd7492b80fcf63fb44bbbbc25ca Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 7 Nov 2025 16:05:26 +0100 Subject: [PATCH 2/4] new manifest naming format --- common/arg.cpp | 2 +- common/download.cpp | 22 +++++++++++++++++----- common/download.h | 6 +++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index e5b587a85652d..e84ef25737dd1 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -750,7 +750,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex for (size_t i = 0; i < models.size(); i++) { auto & model = models[i]; auto num = std::to_string(i+1); // so that we can print trailing space - printf("%4s. %-50s tag: %s\n", num.c_str(), model.name.c_str(), model.tag.c_str()); + printf("%4s. %s\n", num.c_str(), model.to_string().c_str()); } exit(0); } diff --git a/common/download.cpp b/common/download.cpp index 7a7aaeebb0071..a2f3cfa7e316a 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -50,11 +50,19 @@ using json = nlohmann::ordered_json; // downloader // +// validate repo name format: owner/repo +static bool validate_repo_name(const std::string & repo) { + static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)"); + return std::regex_match(repo, repo_regex); +} + static std::string get_manifest_path(const std::string & repo, const std::string & tag) { // we use "=" to avoid clashing with other component, while still being allowed on windows std::string fname = "manifest=" + repo + "=" + tag + ".json"; - string_replace_all(fname, "/", "_"); - string_replace_all(fname, "\\", "_"); + if (!validate_repo_name(repo)) { + throw std::runtime_error("error: repo name must be in the format 'owner/repo'"); + } + string_replace_all(fname, "/", "="); return fs_get_cache_file(fname); } @@ -910,11 +918,15 @@ std::vector common_list_cached_models() { std::string fname = file.name; string_replace_all(fname, ".json", ""); // remove extension auto parts = string_split(fname, '='); - if (parts.size() != 3) { + if (parts.size() == 4) { + // expect format: manifest==== + model_info.user = parts[1]; + model_info.repo = parts[2]; + model_info.tag = parts[3]; + } else { + // invalid format continue; } - model_info.name = parts[1]; - model_info.tag = parts[2]; model_info.size = 0; // TODO: get GGUF size, not manifest size models.push_back(model_info); } diff --git a/common/download.h b/common/download.h index 307ee56ecb1f3..25bef32f17d4e 100644 --- a/common/download.h +++ b/common/download.h @@ -10,9 +10,13 @@ struct common_params_model; struct common_cached_model_info { std::string manifest_path; - std::string name; // note: this is not "repo", slashes are replaced with underscores + std::string user; + std::string repo; std::string tag; size_t size = 0; // GGUF size in bytes + std::string to_string() const { + return user + "/" + repo + ":" + tag; + } }; struct common_hf_file_res { From e40850a625c5208bf84b4d21278c35cc2c068e3d Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 7 Nov 2025 16:07:21 +0100 Subject: [PATCH 3/4] improve naming --- common/download.cpp | 8 ++++---- common/download.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/common/download.cpp b/common/download.cpp index a2f3cfa7e316a..57308a5c6d536 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -919,10 +919,10 @@ std::vector common_list_cached_models() { string_replace_all(fname, ".json", ""); // remove extension auto parts = string_split(fname, '='); if (parts.size() == 4) { - // expect format: manifest==== - model_info.user = parts[1]; - model_info.repo = parts[2]; - model_info.tag = parts[3]; + // expect format: manifest==== + model_info.user = parts[1]; + model_info.model = parts[2]; + model_info.tag = parts[3]; } else { // invalid format continue; diff --git a/common/download.h b/common/download.h index 25bef32f17d4e..45a6bd6bba859 100644 --- a/common/download.h +++ b/common/download.h @@ -11,11 +11,11 @@ struct common_params_model; struct common_cached_model_info { std::string manifest_path; std::string user; - std::string repo; + std::string model; std::string tag; size_t size = 0; // GGUF size in bytes std::string to_string() const { - return user + "/" + repo + ":" + tag; + return user + "/" + model + ":" + tag; } }; From e0677011600c3d5d83be91c3caea33d396faf754 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Sat, 8 Nov 2025 21:51:08 +0100 Subject: [PATCH 4/4] Update common/arg.cpp Co-authored-by: Georgi Gerganov --- common/arg.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index e84ef25737dd1..a570810281499 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -749,8 +749,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex printf("number of models in cache: %zu\n", models.size()); for (size_t i = 0; i < models.size(); i++) { auto & model = models[i]; - auto num = std::to_string(i+1); // so that we can print trailing space - printf("%4s. %s\n", num.c_str(), model.to_string().c_str()); + printf("%4d. %s\n", (int) i + 1, model.to_string().c_str()); } exit(0); }