Add ETDump event tracer support to LLaMa runner

navsud · facebook-github-bot · commit 0cddd2e0218a · 2025-11-20T16:07:16.000-08:00
Summary:
for op-level profiling of od-llms

**Addresses reviewer feedback:**
- Added `ET_EVENT_TRACER_ENABLED` ifdef guards around ETDump code to ensure normal builds compile without event tracer support
- Replaced hardcoded `/data/local/tmp/etdump.bin` path with configurable `--etdump_path` flag (default: `"etdump.in"`)
- ETDumpGen is created only when compiled with event tracer support enabled

Differential Revision: D87122487
diff --git a/examples/models/llama/main.cpp b/examples/models/llama/main.cpp
@@ -10,9 +10,12 @@
 #include <gflags/gflags.h>
 #include <sstream>
 #include <vector>
-
 #include <executorch/examples/models/llama/runner/runner.h>
 
+#ifdef ET_EVENT_TRACER_ENABLED
+#include <executorch/devtools/etdump/etdump_flatcc.h>
+#endif
+
 #if defined(ET_USE_THREADPOOL)
 #include <executorch/extension/threadpool/cpuinfo_utils.h>
 #include <executorch/extension/threadpool/threadpool.h>
@@ -64,6 +67,11 @@ DEFINE_int32(
 
 DEFINE_bool(warmup, false, "Whether to run a warmup run.");
 
+DEFINE_string(
+    etdump_path,
+    "etdump.in",
+    "If an etdump path is provided, generate an ETDump file at the specified path for profiling purposes.");
+
 // Helper function to parse comma-separated string lists
 std::vector<std::string> parseStringList(const std::string& input) {
   std::vector<std::string> result;
@@ -117,9 +125,26 @@ int32_t main(int32_t argc, char** argv) {
         ->_unsafe_reset_threadpool(num_performant_cores);
   }
 #endif
+
+#ifdef ET_EVENT_TRACER_ENABLED
+  // Create ETDumpGen and get raw pointer reference for later access
+  auto etdump_gen_ptr = std::make_unique<executorch::etdump::ETDumpGen>();
+  executorch::etdump::ETDumpGen* etdump_gen = etdump_gen_ptr.get();
+#endif
+
   // create llama runner
   std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
-      example::create_llama_runner(model_path, tokenizer_path, data_paths);
+      example::create_llama_runner(
+          model_path,
+          tokenizer_path,
+          data_paths,
+          temperature,
+#ifdef ET_EVENT_TRACER_ENABLED
+          std::move(etdump_gen_ptr)
+#else
+          nullptr
+#endif
+      );
 
   if (runner == nullptr) {
     ET_LOG(Error, "Failed to create llama runner");
@@ -157,5 +182,22 @@ int32_t main(int32_t argc, char** argv) {
     return 1;
   }
 
+#ifdef ET_EVENT_TRACER_ENABLED
+  if (etdump_gen != nullptr) {
+    executorch::etdump::ETDumpResult result = etdump_gen->get_etdump_data();
+    if (result.buf != nullptr && result.size > 0) {
+      FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+");
+      if (f == nullptr) {
+        ET_LOG(Error, "Failed to open etdump file at path: %s", FLAGS_etdump_path.c_str());
+      } else {
+        fwrite((uint8_t*)result.buf, 1, result.size, f);
+        fclose(f);
+        ET_LOG(Info, "ETDump file written to: %s", FLAGS_etdump_path.c_str());
+      }
+      free(result.buf);
+    }
+  }
+#endif
+
   return 0;
 }
diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp
@@ -36,22 +36,26 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
-    float temperature) {
+    float temperature,
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer
+  ) {
   if (data_path.has_value()) {
     std::vector<std::string> data_files;
     data_files.push_back(data_path.value());
     return create_llama_runner(
-        model_path, tokenizer_path, std::move(data_files), temperature);
+        model_path, tokenizer_path, std::move(data_files), temperature, std::move(event_tracer));
   }
   return create_llama_runner(
-      model_path, tokenizer_path, std::vector<std::string>(), temperature);
+      model_path, tokenizer_path, std::vector<std::string>(), temperature, std::move(event_tracer));
 }
 
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::vector<std::string> data_files,
-    float temperature) {
+    float temperature,
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer
+) {
   ET_LOG(
       Info,
       "Creating LLaMa runner: model_path=%s, tokenizer_path=%s",
@@ -70,7 +74,7 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     return nullptr;
   }
   return llm::create_text_llm_runner(
-      model_path, std::move(tokenizer), data_files);
+      model_path, std::move(tokenizer), data_files, temperature, std::move(event_tracer));
 }
 
 } // namespace example
diff --git a/examples/models/llama/runner/runner.h b/examples/models/llama/runner/runner.h
@@ -28,13 +28,17 @@ std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::optional<const std::string> data_path,
-    float temperature = -1.0f);
+    float temperature = -1.0f,
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr
+);
 
 std::unique_ptr<llm::TextLLMRunner> create_llama_runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
     std::vector<std::string> data_files = {},
-    float temperature = -1.0f);
+    float temperature = -1.0f,
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr
+);
 
 std::unique_ptr<tokenizers::Tokenizer> load_llama_tokenizer(
     const std::string& tokenizer_path,
diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl
@@ -28,6 +28,9 @@ def define_common_targets():
             exported_headers = [
                 "runner.h",
             ],
+            deps = [
+                "//executorch/devtools/etdump:etdump_flatcc",
+            ],
             preprocessor_flags = [
                 "-DUSE_ATEN_LIB",
             ] if aten else [],
diff --git a/examples/models/llama/targets.bzl b/examples/models/llama/targets.bzl
@@ -19,6 +19,7 @@ def define_common_targets():
                     "//executorch/extension/evalue_util:print_evalue",
                     "//executorch/extension/threadpool:threadpool",
                     "//executorch/extension/threadpool:cpuinfo_utils",
+                    "//executorch/devtools/etdump:etdump_flatcc" + aten_suffix,
                 ],
                 external_deps = [
                     "gflags",
diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp
@@ -200,7 +200,9 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::vector<std::string> data_files,
-    float temperature) {
+    float temperature,
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer
+) {
   // Sanity check tokenizer
   if (!tokenizer || !tokenizer->is_loaded()) {
     ET_LOG(Error, "Tokenizer is null or not loaded");
@@ -211,9 +213,9 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
   std::unique_ptr<Module> module;
   if (data_files.size() > 0) {
     module = std::make_unique<Module>(
-        model_path, data_files, Module::LoadMode::File);
+        model_path, data_files, Module::LoadMode::File, std::move(event_tracer));
   } else {
-    module = std::make_unique<Module>(model_path, Module::LoadMode::File);
+    module = std::make_unique<Module>(model_path, Module::LoadMode::File, std::move(event_tracer));
   }
 
   // Get metadata from Module
diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h
@@ -123,7 +123,9 @@ ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
     const std::string& model_path,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
     std::vector<std::string> data_files = {},
-    float temperature = -1.0f);
+    float temperature = -1.0f,
+    std::unique_ptr<::executorch::runtime::EventTracer> event_tracer = nullptr
+);
 
 /**
  * @brief Creates a MultimodalRunner instance with dependency injection