Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions examples/models/llama/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@
#include <gflags/gflags.h>
#include <sstream>
#include <vector>

#include <executorch/examples/models/llama/runner/runner.h>

#ifdef ET_EVENT_TRACER_ENABLED
#include <executorch/devtools/etdump/etdump_flatcc.h>
#endif

#if defined(ET_USE_THREADPOOL)
#include <executorch/extension/threadpool/cpuinfo_utils.h>
#include <executorch/extension/threadpool/threadpool.h>
Expand Down Expand Up @@ -64,6 +67,11 @@ DEFINE_int32(

DEFINE_bool(warmup, false, "Whether to run a warmup run.");

DEFINE_string(
etdump_path,
"etdump.in",
"If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes.");

// Helper function to parse comma-separated string lists
std::vector<std::string> parseStringList(const std::string& input) {
std::vector<std::string> result;
Expand Down Expand Up @@ -117,9 +125,26 @@ int32_t main(int32_t argc, char** argv) {
->_unsafe_reset_threadpool(num_performant_cores);
}
#endif

#ifdef ET_EVENT_TRACER_ENABLED
// Create ETDumpGen and get raw pointer reference for later access
auto etdump_gen_ptr = std::make_unique<executorch::etdump::ETDumpGen>();
executorch::etdump::ETDumpGen* etdump_gen = etdump_gen_ptr.get();
#endif

// create llama runner
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
example::create_llama_runner(model_path, tokenizer_path, data_paths);
example::create_llama_runner(
model_path,
tokenizer_path,
data_paths,
temperature,
#ifdef ET_EVENT_TRACER_ENABLED
std::move(etdump_gen_ptr)
#else
nullptr
#endif
);

if (runner == nullptr) {
ET_LOG(Error, "Failed to create llama runner");
Expand Down Expand Up @@ -157,5 +182,22 @@ int32_t main(int32_t argc, char** argv) {
return 1;
}

#ifdef ET_EVENT_TRACER_ENABLED
if (etdump_gen != nullptr) {
executorch::etdump::ETDumpResult result = etdump_gen->get_etdump_data();
if (result.buf != nullptr && result.size > 0) {
FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+");
if (f == nullptr) {
ET_LOG(Error, "Failed to open etdump file at path: %s", FLAGS_etdump_path.c_str());
} else {
fwrite((uint8_t*)result.buf, 1, result.size, f);
fclose(f);
ET_LOG(Info, "ETDump file written to: %s", FLAGS_etdump_path.c_str());
}
free(result.buf);
}
}
#endif

return 0;
}
14 changes: 9 additions & 5 deletions examples/models/llama/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,26 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::optional<const std::string> data_path,
float temperature) {
float temperature,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer
) {
if (data_path.has_value()) {
std::vector<std::string> data_files;
data_files.push_back(data_path.value());
return create_llama_runner(
model_path, tokenizer_path, std::move(data_files), temperature);
model_path, tokenizer_path, std::move(data_files), temperature, std::move(event_tracer));
}
return create_llama_runner(
model_path, tokenizer_path, std::vector<std::string>(), temperature);
model_path, tokenizer_path, std::vector<std::string>(), temperature, std::move(event_tracer));
}

std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::vector<std::string> data_files,
float temperature) {
float temperature,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer
) {
ET_LOG(
Info,
"Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
Expand All @@ -70,7 +74,7 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
return nullptr;
}
return llm::create_text_llm_runner(
model_path, std::move(tokenizer), data_files);
model_path, std::move(tokenizer), data_files, temperature, std::move(event_tracer));
}

} // namespace example
8 changes: 6 additions & 2 deletions examples/models/llama/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,17 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::optional<const std::string> data_path,
float temperature = -1.0f);
float temperature = -1.0f,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr
);

std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
const std::string& model_path,
const std::string& tokenizer_path,
std::vector<std::string> data_files = {},
float temperature = -1.0f);
float temperature = -1.0f,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr
);

std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
const std::string& tokenizer_path,
Expand Down
3 changes: 3 additions & 0 deletions examples/models/llama/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def define_common_targets():
exported_headers = [
"runner.h",
],
deps = [
"//executorch/devtools/etdump:etdump_flatcc",
],
preprocessor_flags = [
"-DUSE_ATEN_LIB",
] if aten else [],
Expand Down
1 change: 1 addition & 0 deletions examples/models/llama/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def define_common_targets():
"//executorch/extension/evalue_util:print_evalue",
"//executorch/extension/threadpool:threadpool",
"//executorch/extension/threadpool:cpuinfo_utils",
"//executorch/devtools/etdump:etdump_flatcc" + aten_suffix,
],
external_deps = [
"gflags",
Expand Down
8 changes: 5 additions & 3 deletions extension/llm/runner/llm_runner_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::vector<std::string> data_files,
float temperature) {
float temperature,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer
) {
// Sanity check tokenizer
if (!tokenizer || !tokenizer->is_loaded()) {
ET_LOG(Error, "Tokenizer is null or not loaded");
Expand All @@ -211,9 +213,9 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
std::unique_ptr<Module> module;
if (data_files.size() > 0) {
module = std::make_unique<Module>(
model_path, data_files, Module::LoadMode::File);
model_path, data_files, Module::LoadMode::File, std::move(event_tracer));
} else {
module = std::make_unique<Module>(model_path, Module::LoadMode::File);
module = std::make_unique<Module>(model_path, Module::LoadMode::File, std::move(event_tracer));
}

// Get metadata from Module
Expand Down
4 changes: 3 additions & 1 deletion extension/llm/runner/llm_runner_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::vector<std::string> data_files = {},
float temperature = -1.0f);
float temperature = -1.0f,
std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr
);

/**
* @brief Creates a MultimodalRunner instance with dependency injection
Expand Down
Loading