From bf04d28fffecfa582294dd04d4681864305b127b Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 25 May 2024 03:24:59 +0800 Subject: [PATCH 01/25] Use glob ignore --- .gitignore | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 26a46f23f62..27be53151fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,8 @@ .hypothesis buck-out/ cmake-out/ -cmake-android-out/ -cmake-out-android/ -cmake-ios-out/ +cmake-*-out/ +cmake-out-*/ ethos-u-scratch/ executorch.egg-info pip-out/ From e58042c09b7b0e2a5ed78f27eeb44d08c268b3c9 Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 25 May 2024 03:33:57 +0800 Subject: [PATCH 02/25] Support Windows --- backends/qualcomm/CMakeLists.txt | 2 +- backends/qualcomm/aot/python/CMakeLists.txt | 3 + backends/qualcomm/runtime/SharedBuffer.cpp | 11 + backends/qualcomm/runtime/SharedBuffer.h | 2 +- backends/qualcomm/runtime/Utils.cpp | 8 + .../qualcomm/runtime/backends/CMakeLists.txt | 11 +- .../runtime/backends/QnnBackendFactory.cpp | 4 + .../runtime/backends/QnnFunctionInterface.h | 5 + .../runtime/backends/QnnImplementation.cpp | 49 +++++ .../htpbackend/HtpContextCustomConfig.h | 2 +- build/executorch-config.cmake | 7 +- examples/models/llama2/runner/util.h | 8 + extension/data_loader/mmap_data_loader.cpp | 196 +++++++++++++++++- extension/data_loader/mmap_data_loader.h | 31 +++ runtime/backend/interface.cpp | 79 ++++++- runtime/kernel/operator_registry.cpp | 69 +++++- runtime/platform/system.h | 14 ++ runtime/platform/targets.bzl | 1 + 18 files changed, 479 insertions(+), 23 deletions(-) diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index a840ab0bb92..77b43e96288 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -223,7 +223,7 @@ add_subdirectory( install(TARGETS qnn_executorch_backend DESTINATION lib) # QNN pybind -if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64") +if(EXECUTORCH_BUILD_PYBIND) add_subdirectory( ${EXECUTORCH_SOURCE_DIR}/third-party/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11 diff --git a/backends/qualcomm/aot/python/CMakeLists.txt b/backends/qualcomm/aot/python/CMakeLists.txt index 337cfae1776..f4ce70dc314 100644 --- a/backends/qualcomm/aot/python/CMakeLists.txt +++ b/backends/qualcomm/aot/python/CMakeLists.txt @@ -15,3 +15,6 @@ target_sources( PyQnnWrapperAdaptor PUBLIC ${CMAKE_CURRENT_LIST_DIR}/PyQnnWrapperAdaptor.cpp ${CMAKE_CURRENT_LIST_DIR}/PyQnnWrapperAdaptor.h ) + +target_compile_options(PyQnnManagerAdaptor PRIVATE -fexceptions) +target_compile_options(PyQnnWrapperAdaptor PRIVATE -fexceptions) diff --git a/backends/qualcomm/runtime/SharedBuffer.cpp b/backends/qualcomm/runtime/SharedBuffer.cpp index 423c5d63723..0a3eb874104 100644 --- a/backends/qualcomm/runtime/SharedBuffer.cpp +++ b/backends/qualcomm/runtime/SharedBuffer.cpp @@ -5,7 +5,9 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#ifdef __ANDROID__ #include +#endif #include #include @@ -99,6 +101,10 @@ bool SharedBuffer::IsAllocated(void* buf) { } Error SharedBuffer::Load() { +#ifndef __ANDROID__ + QNN_EXECUTORCH_LOG_ERROR("Shared buffer is not supported on this platform."); + return Error::Internal; +#else // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/ // and /vendor/lib64/ respectively. lib_cdsp_rpc_ = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL); @@ -121,15 +127,20 @@ Error SharedBuffer::Load() { return Error::Internal; } return Error::Ok; +#endif } Error SharedBuffer::UnLoad() { +#ifndef __ANDROID__ + return Error::Ok; +#else if (dlclose(lib_cdsp_rpc_) != 0) { QNN_EXECUTORCH_LOG_ERROR( "Unable to close shared buffer. dlerror(): %s", dlerror()); return Error::Internal; }; return Error::Ok; +#endif } } // namespace qnn } // namespace executor diff --git a/backends/qualcomm/runtime/SharedBuffer.h b/backends/qualcomm/runtime/SharedBuffer.h index 1803e8af879..9ee3e4b3a23 100644 --- a/backends/qualcomm/runtime/SharedBuffer.h +++ b/backends/qualcomm/runtime/SharedBuffer.h @@ -55,7 +55,7 @@ class SharedBuffer final { // Pointer to the dlopen'd libcdsprpc.so shared library which contains // rpcmem_alloc, rpcmem_free, rpcmem_to_fd APIs - void* lib_cdsp_rpc_; + [[maybe_unused]] void* lib_cdsp_rpc_; // Function pointer to rpcmem_alloc RpcMemAllocFn_t rpc_mem_alloc_; // Function pointer to rpcmem_free diff --git a/backends/qualcomm/runtime/Utils.cpp b/backends/qualcomm/runtime/Utils.cpp index c049d3720ee..5fe46fe6155 100644 --- a/backends/qualcomm/runtime/Utils.cpp +++ b/backends/qualcomm/runtime/Utils.cpp @@ -7,7 +7,11 @@ */ #include #include +#ifdef _WIN32 +#include +#else #include +#endif namespace torch { namespace executor { namespace qnn { @@ -24,7 +28,11 @@ void CreateDirectory(const std::string& path) { return; } CreateDirectory(subdir); +#ifdef _WIN32 + int mkdir_err = _mkdir(subdir.c_str()); +#else int mkdir_err = mkdir(subdir.c_str(), S_IRWXU | S_IRWXG | S_IRWXO); +#endif if (mkdir_err != 0 && errno != EEXIST) { std::string err_msg = "Failed to create " + subdir + " folder\n"; QNN_EXECUTORCH_LOG_ERROR(err_msg.c_str()); diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index e173f08af08..e8971024a2d 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -44,8 +44,17 @@ target_sources( ) # qnn_device +set(BACKEND_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +if(BACKEND_ARCH STREQUAL "arm64" OR BACKEND_ARCH STREQUAL "aarch64" OR + BACKEND_ARCH STREQUAL "arm64-v8a") + set(BACKEND_ARCH "aarch64") +elseif(BACKEND_ARCH STREQUAL "x86_64") + set(BACKEND_ARCH "x86_64") +else() + message(FATAL_ERROR "Unsupported architecture: ${BACKEND_ARCH}") +endif() set(HOST_ARCHITECTURE - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR} + ${CMAKE_CURRENT_LIST_DIR}/htpbackend/${BACKEND_ARCH} ) target_sources( diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index acb95524682..131fb2459e0 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -23,8 +23,12 @@ std::unique_ptr QnnBackendFactory::Create( const std::string skel_library_dir = htp_options->skel_library_dir()->str(); if (!skel_library_dir.empty()) { +#ifdef _WIN32 + _putenv_s("ADSP_LIBRARY_PATH", skel_library_dir.c_str()); +#else setenv( "ADSP_LIBRARY_PATH", skel_library_dir.c_str(), /*overwrite=*/1); +#endif } QNN_EXECUTORCH_LOG_INFO( "skel_library_dir: %s", skel_library_dir.c_str()); diff --git a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h index 5ea187ffa52..c78ce7a781b 100644 --- a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h +++ b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h @@ -7,6 +7,11 @@ */ #pragma once +// Dummy fix `interface` definition in Windows +#if defined(__WIN32__) && defined(interface) +#undef interface +#endif + #include "QnnInterface.h" #include "Saver/QnnSaver.h" diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp index 0f7d45b54b2..5c537319c5c 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp @@ -5,7 +5,11 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ +#ifdef _WIN32 +#include +#else #include +#endif #include #include "QnnInterface.h" @@ -14,7 +18,11 @@ namespace executor { namespace qnn { template Fn loadQnnFunction(void* handle, const char* function_name) { +#ifdef _WIN32 + return reinterpret_cast(GetProcAddress(reinterpret_cast(handle), function_name)); +#else return reinterpret_cast(dlsym(handle, function_name)); // NOLINT +#endif } Error QnnImplementation::InitBackend( @@ -54,13 +62,21 @@ Error QnnImplementation::StartBackend( const std::string& lib_path, const QnnSaver_Config_t** saver_config) { Qnn_ErrorHandle_t error = QNN_SUCCESS; +#ifdef _WIN32 + void* lib_handle = LoadLibrary(lib_path.c_str()); +#else void* lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); +#endif if (lib_handle == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Cannot Open QNN library %s, with error: %s", lib_path.c_str(), +#ifdef _WIN32 + GetLastError()); +#else dlerror()); +#endif return Error::Internal; } @@ -72,7 +88,11 @@ Error QnnImplementation::StartBackend( QNN_EXECUTORCH_LOG_ERROR( "QnnImplementation::Load Cannot load symbol " "QnnInterface_getProviders : %s", +#ifdef _WIN32 + GetLastError()); +#else dlerror()); +#endif return Error::Internal; } @@ -120,6 +140,14 @@ Error QnnImplementation::StartBackend( if (loaded_lib_handle_.count(backend_id) > 0) { QNN_EXECUTORCH_LOG_WARN("closing %pK...", loaded_lib_handle_[backend_id]); +#ifdef _WIN32 + if (FreeLibrary(reinterpret_cast(loaded_lib_handle_[backend_id])) == 0) { + QNN_EXECUTORCH_LOG_WARN( + "Sadly, fail to close %pK with error %d", + loaded_lib_handle_[backend_id], + GetLastError()); + } +#else int dlclose_error = dlclose(loaded_lib_handle_[backend_id]); if (dlclose_error != 0) { QNN_EXECUTORCH_LOG_WARN( @@ -127,6 +155,7 @@ Error QnnImplementation::StartBackend( loaded_lib_handle_[backend_id], dlerror()); } +#endif } loaded_lib_handle_[backend_id] = lib_handle; @@ -138,6 +167,15 @@ Error QnnImplementation::StartBackend( lib_path_to_backend_id_.erase(lib_path); loaded_backend_.erase(backend_id); +#ifdef _WIN32 + if (FreeLibrary(reinterpret_cast(loaded_lib_handle_[backend_id])) == 0) { + QNN_EXECUTORCH_LOG_WARN( + "fail to close %pK after backend-init " + "failure, with error %d", + loaded_lib_handle_[backend_id], + GetLastError()); + } +#else int dlclose_error = dlclose(loaded_lib_handle_[backend_id]); if (dlclose_error != 0) { QNN_EXECUTORCH_LOG_WARN( @@ -146,6 +184,7 @@ Error QnnImplementation::StartBackend( loaded_lib_handle_[backend_id], dlerror()); } +#endif loaded_lib_handle_.erase(backend_id); return be_init_st; @@ -160,12 +199,22 @@ Error QnnImplementation::TerminateAllBackends() { loaded_backend_.clear(); for (auto& it : loaded_lib_handle_) { +#ifdef _WIN32 + if (FreeLibrary(reinterpret_cast(it.second)) == 0) { + QNN_EXECUTORCH_LOG_ERROR( + "Fail to close QNN backend %d with error %d", + it.first, + GetLastError()); + ret_status = Error::Internal; + } +#else int dlclose_error = dlclose(it.second); if (dlclose_error != 0) { QNN_EXECUTORCH_LOG_ERROR( "Fail to close QNN backend %d with error %s", it.first, dlerror()); ret_status = Error::Internal; } +#endif } loaded_lib_handle_.clear(); lib_path_to_backend_id_.clear(); diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h b/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h index 00568bdc327..35fc2d373de 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h @@ -39,7 +39,7 @@ class HtpContextCustomConfig { return htp_context_config_.back().get(); } - const QnnContext* context_; + [[maybe_unused]] const QnnContext* context_; std::vector> htp_context_config_; [[maybe_unused]] const QnnExecuTorchHtpBackendOptions* htp_options_; diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake index 8bfa002dad5..1682a676f11 100644 --- a/build/executorch-config.cmake +++ b/build/executorch-config.cmake @@ -76,7 +76,12 @@ foreach(lib ${lib_list}) # keep all libs as static when CMAKE_TOOLCHAIN_IOS is used add_library(${lib} STATIC IMPORTED) endif() - set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION "${${lib_var}}") + if ("${${lib_var}}" MATCHES ".dll.a$") + string(REGEX REPLACE ".dll.a$" ".dll" ${lib_var} "${${lib_var}}") + set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION "${${lib_var}}" IMPORTED_IMPLIB "${${lib_var}}.a") + else() + set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION "${${lib_var}}") + endif() target_include_directories(${lib} INTERFACE ${_root}) endif() endforeach() diff --git a/examples/models/llama2/runner/util.h b/examples/models/llama2/runner/util.h index 5d4792b6414..cfb0aa7d0b4 100644 --- a/examples/models/llama2/runner/util.h +++ b/examples/models/llama2/runner/util.h @@ -8,7 +8,11 @@ #pragma once #include +#ifdef _WIN32 +#include +#else #include +#endif #include namespace torch { @@ -39,9 +43,13 @@ void inline safe_printf(const char* piece) { long inline time_in_ms() { // return time in milliseconds, for benchmarking the model speed +#ifdef _WIN32 + return GetTickCount(); +#else struct timespec time; clock_gettime(CLOCK_REALTIME, &time); return time.tv_sec * 1000 + time.tv_nsec / 1000000; +#endif } } // namespace util diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 1c16251668d..adb8c8e0b79 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -12,11 +12,16 @@ #include #include +#ifdef _WIN32 +#include +#include +#else #include #include #include #include #include +#endif #include #include @@ -54,19 +59,69 @@ Range get_overlapping_pages(uintptr_t offset, size_t size, size_t page_size) { } // namespace +#ifdef _WIN32 +const char* get_last_error_message() { + DWORD errorMessageID = GetLastError(); + if(errorMessageID == 0) { + return ""; //No error message has been recorded + } + LPSTR messageBuffer = nullptr; + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); + return messageBuffer; +} +#endif + +#ifdef _WIN32 +class FileHandle { + public: + explicit FileHandle(HANDLE handle) : handle_(handle) {} + ~FileHandle() { + if (handle_ != INVALID_HANDLE_VALUE) { + CloseHandle(handle_); + } + } + HANDLE get() const { return handle_; } + + private: + HANDLE handle_; +}; +#else +class FileHandle { + public: + explicit FileHandle(int fd) : fd_(fd) {} + ~FileHandle() { + if (fd_ >= 0) { + ::close(fd_); + } + } + int get() const { return fd_; } + + private: + int fd_; +}; +#endif + MmapDataLoader::~MmapDataLoader() { // file_name_ can be nullptr if this instance was moved from, but freeing a // null pointer is safe. std::free(const_cast(file_name_)); - // fd_ can be -1 if this instance was moved from, but closing a negative fd is - // safe (though it will return an error). - ::close(fd_); +#ifdef _WIN32 + if (mapping_handle_ != nullptr) { + CloseHandle(mapping_handle_); + } +#endif } Result MmapDataLoader::from( const char* file_name, MmapDataLoader::MlockConfig mlock_config) { // Cache the page size. +#ifdef _WIN32 + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + size_t page_size = std::max(system_info.dwPageSize, system_info.dwAllocationGranularity); +#else long page_size = sysconf(_SC_PAGESIZE); if (page_size < 0) { ET_LOG(Error, "Could not get page size: %s (%d)", ::strerror(errno), errno); @@ -76,7 +131,55 @@ Result MmapDataLoader::from( ET_LOG(Error, "Page size 0x%ld is not a power of 2", page_size); return Error::InvalidState; } +#endif + +#ifdef _WIN32 + HANDLE file_handle = CreateFileA( + file_name, + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + nullptr); + if (file_handle == INVALID_HANDLE_VALUE) { + ET_LOG( + Error, + "Failed to open %s: %s", + file_name, + get_last_error_message()); + return Error::AccessFailed; + } + LARGE_INTEGER file_size_li; + if (!GetFileSizeEx(file_handle, &file_size_li)) { + ET_LOG( + Error, + "Could not get length of %s: %s", + file_name, + get_last_error_message()); + CloseHandle(file_handle); + return Error::AccessFailed; + } + size_t file_size = static_cast(file_size_li.QuadPart); + + HANDLE mapping_handle = CreateFileMappingA( + file_handle, + nullptr, + PAGE_READONLY, + 0, + 0, + nullptr); + if (mapping_handle == nullptr) { + ET_LOG( + Error, + "Could not create file mapping for %s: %s", + file_name, + get_last_error_message()); + CloseHandle(file_handle); + return Error::AccessFailed; + } +#else // Use open() instead of fopen() because mmap() needs a file descriptor. int fd = ::open(file_name, O_RDONLY); if (fd < 0) { @@ -103,17 +206,28 @@ Result MmapDataLoader::from( return Error::AccessFailed; } size_t file_size = st.st_size; +#endif // Copy the filename so we can print better debug messages if reads fail. const char* file_name_copy = ::strdup(file_name); if (file_name_copy == nullptr) { ET_LOG(Error, "strdup(%s) failed", file_name); +#ifdef _WIN32 + CloseHandle(mapping_handle); + CloseHandle(file_handle); +#else ::close(fd); +#endif return Error::MemoryAllocationFailed; } return MmapDataLoader( +#ifdef _WIN32 + file_handle, + mapping_handle, +#else fd, +#endif file_size, file_name_copy, static_cast(page_size), @@ -127,10 +241,19 @@ namespace { * `context` is actually the OS page size as a uintptr_t. */ void MunmapSegment(void* context, void* data, size_t size) { - const uintptr_t page_size = reinterpret_cast(context); + const size_t page_size = reinterpret_cast(context); - Range range = - get_overlapping_pages(reinterpret_cast(data), size, page_size); + Range range = get_overlapping_pages(reinterpret_cast(data), size, page_size); +#ifdef _WIN32 + if (!UnmapViewOfFile(reinterpret_cast(range.start))) { + ET_LOG( + Error, + "UnmapViewOfFile(0x%zx, %zu) failed: %s", + range.start, + range.size, + get_last_error_message()); + } +#else int ret = ::munmap(reinterpret_cast(range.start), range.size); if (ret < 0) { // Let the user know that something went wrong, but there's nothing we can @@ -143,13 +266,18 @@ void MunmapSegment(void* context, void* data, size_t size) { ::strerror(errno), errno); } +#endif } } // namespace Result MmapDataLoader::Load(size_t offset, size_t size) { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. +#ifdef _WIN32 + file_handle_ != INVALID_HANDLE_VALUE, +#else fd_ >= 0, +#endif InvalidState, "Uninitialized"); ET_CHECK_OR_RETURN_ERROR( @@ -162,7 +290,11 @@ Result MmapDataLoader::Load(size_t offset, size_t size) { file_size_); ET_CHECK_OR_RETURN_ERROR( // Recommended by a lint warning. +#ifdef _WIN32 + offset <= std::numeric_limits::max(), +#else offset <= std::numeric_limits::max(), +#endif InvalidArgument, "Offset %zu too large for off_t", offset); @@ -179,6 +311,26 @@ Result MmapDataLoader::Load(size_t offset, size_t size) { // Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since // the data is read-only, but use PRIVATE just to further avoid accidentally // modifying the file. +#ifdef _WIN32 + if (range.start + range.size > file_size_) { + range.size = file_size_ - range.start; + } + + void* pages = MapViewOfFile( + mapping_handle_, + FILE_MAP_READ | FILE_MAP_COPY, + static_cast(range.start >> 32), + static_cast(range.start & 0xFFFFFFFF), + range.size); + ET_CHECK_OR_RETURN_ERROR( + pages != nullptr, + AccessFailed, + "Failed to map %s: MapViewOfFile(..., size=%zd, ..., offset=0x%zx): %s", + file_name_, + range.size, + range.start, + get_last_error_message()); +#else void* pages = ::mmap( nullptr, range.size, @@ -194,9 +346,36 @@ Result MmapDataLoader::Load(size_t offset, size_t size) { range.size, fd_, range.start); +#endif if (mlock_config_ == MlockConfig::UseMlock || mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { +#ifdef _WIN32 + if (!VirtualLock(pages, size)) { + if (mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { + ET_LOG( + Debug, + "Ignoring VirtualLock error for file %s (off=0x%zd): " + "VirtualLock(%p, %zu) failed: %s", + file_name_, + offset, + pages, + size, + get_last_error_message()); + } else { + ET_LOG( + Error, + "File %s (off=0x%zd): VirtualLock(%p, %zu) failed: %s", + file_name_, + offset, + pages, + size, + get_last_error_message()); + UnmapViewOfFile(pages); + return Error::NotSupported; + } + } +#else int err = ::mlock(pages, size); if (err < 0) { if (mlock_config_ == MlockConfig::UseMlockIgnoreErrors) { @@ -225,6 +404,7 @@ Result MmapDataLoader::Load(size_t offset, size_t size) { } } // No need to keep track of this. munmap() will unlock as a side effect. +#endif } // The requested data is at an offset into the mapped pages. @@ -245,7 +425,11 @@ Result MmapDataLoader::Load(size_t offset, size_t size) { Result MmapDataLoader::size() const { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. +#ifdef _WIN32 + file_handle_ != INVALID_HANDLE_VALUE, +#else fd_ >= 0, +#endif InvalidState, "Uninitialized"); return file_size_; diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h index b81a562624f..cf94f4fbc85 100644 --- a/extension/data_loader/mmap_data_loader.h +++ b/extension/data_loader/mmap_data_loader.h @@ -12,6 +12,12 @@ #include #include +#ifdef _WIN32 +#include +#else +#include +#endif + namespace torch { namespace executor { namespace util { @@ -75,12 +81,22 @@ class MmapDataLoader : public DataLoader { : file_name_(rhs.file_name_), file_size_(rhs.file_size_), page_size_(rhs.page_size_), +#ifdef _WIN32 + file_handle_(rhs.file_handle_), + mapping_handle_(rhs.mapping_handle_), +#else fd_(rhs.fd_), +#endif mlock_config_(rhs.mlock_config_) { rhs.file_name_ = nullptr; rhs.file_size_ = 0; rhs.page_size_ = 0; +#ifdef _WIN32 + rhs.file_handle_ = INVALID_HANDLE_VALUE; + rhs.mapping_handle_ = nullptr; +#else rhs.fd_ = -1; +#endif rhs.mlock_config_ = MlockConfig::NoMlock; } @@ -93,7 +109,12 @@ class MmapDataLoader : public DataLoader { private: MmapDataLoader( +#ifdef _WIN32 + HANDLE file_handle, + HANDLE mapping_handle, +#else int fd, +#endif size_t file_size, const char* file_name, size_t page_size, @@ -101,7 +122,12 @@ class MmapDataLoader : public DataLoader { : file_name_(file_name), file_size_(file_size), page_size_(page_size), +#ifdef _WIN32 + file_handle_(file_handle), + mapping_handle_(mapping_handle), +#else fd_(fd), +#endif mlock_config_(mlock_config) {} // Not safely copyable. @@ -112,7 +138,12 @@ class MmapDataLoader : public DataLoader { const char* file_name_; // String data is owned by the instance. size_t file_size_; size_t page_size_; +#ifdef _WIN32 + HANDLE file_handle_; + HANDLE mapping_handle_; +#else int fd_; // Owned by the instance. +#endif MlockConfig mlock_config_; }; diff --git a/runtime/backend/interface.cpp b/runtime/backend/interface.cpp index 3d2aa41a8dd..3ec3af6307f 100644 --- a/runtime/backend/interface.cpp +++ b/runtime/backend/interface.cpp @@ -9,21 +9,86 @@ #include #include -namespace torch { -namespace executor { - -PyTorchBackendInterface::~PyTorchBackendInterface() {} +#ifdef _WIN32 +#include +#include +#include +#define getpid GetCurrentProcessId +#else +#include +#endif // Task t128866626: Remove global static variables. // We want to be able to run multiple Executor instances // and having a global registration isn't a viable solution // in the long term. -BackendRegistry& getBackendRegistry(); -BackendRegistry& getBackendRegistry() { - static BackendRegistry backend_reg; +#ifdef _WIN32 + +#define SHARED_MEMORY_NAME "torch_executor_backend_registry" +static std::shared_ptr backend_reg; + +torch::executor::BackendRegistry& getBackendRegistry() { + if (backend_reg != nullptr) { + return *backend_reg; + } + + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, // read/write access + FALSE, // do not inherit the name + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + + if (hMapFile == NULL) { + // Create a new file mapping object + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + sizeof(torch::executor::BackendRegistry), // maximum object size (low-order DWORD) + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + if (hMapFile == NULL) { + return *backend_reg; + } + } + + torch::executor::BackendRegistry* registry = (torch::executor::BackendRegistry*) MapViewOfFile( + hMapFile, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, + 0, + sizeof(torch::executor::BackendRegistry) + ); + + if (registry == NULL) { + return *backend_reg; + } + + if (backend_reg == nullptr) { + backend_reg = std::shared_ptr(registry, [](torch::executor::BackendRegistry* ptr) { + UnmapViewOfFile(ptr); + }); + } + + return *backend_reg; +} + +#else + +torch::executor::BackendRegistry& getBackendRegistry(); +torch::executor::BackendRegistry& getBackendRegistry() { + static torch::executor::BackendRegistry backend_reg; return backend_reg; } +#endif + +namespace torch { +namespace executor { + +PyTorchBackendInterface::~PyTorchBackendInterface() {} + PyTorchBackendInterface* get_backend_class(const char* name) { return getBackendRegistry().get_backend_class(name); } diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp index 629077ca7e0..02ee9b60ff7 100644 --- a/runtime/kernel/operator_registry.cpp +++ b/runtime/kernel/operator_registry.cpp @@ -14,15 +14,74 @@ #include -namespace torch { -namespace executor { +#ifdef _WIN32 +#include +#include +#include + +#define SHARED_MEMORY_NAME "torch_executor_operator_registry" +static std::shared_ptr operator_reg; + +torch::executor::OperatorRegistry& getOperatorRegistry() { + if (operator_reg != nullptr) { + return *operator_reg; + } + + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, // read/write access + FALSE, // do not inherit the name + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + + if (hMapFile == NULL) { + // Create a new file mapping object + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + sizeof(torch::executor::OperatorRegistry), // maximum object size (low-order DWORD) + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + if (hMapFile == NULL) { + return *operator_reg; + } + } + + torch::executor::OperatorRegistry* registry = (torch::executor::OperatorRegistry*) MapViewOfFile( + hMapFile, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, + 0, + sizeof(torch::executor::OperatorRegistry) + ); + + if (registry == NULL) { + return *operator_reg; + } + + if (operator_reg == nullptr) { + operator_reg = std::shared_ptr(registry, [](torch::executor::OperatorRegistry* ptr) { + UnmapViewOfFile(ptr); + }); + } + + return *operator_reg; +} -OperatorRegistry& getOperatorRegistry(); -OperatorRegistry& getOperatorRegistry() { - static OperatorRegistry operator_registry; +#else + +torch::executor::OperatorRegistry& getOperatorRegistry(); +torch::executor::OperatorRegistry& getOperatorRegistry() { + static torch::executor::OperatorRegistry operator_registry; return operator_registry; } +#endif + +namespace torch { +namespace executor { + Error register_kernels(const ArrayRef& kernels) { Error success = getOperatorRegistry().register_kernels(kernels); if (success == Error::InvalidArgument || success == Error::Internal) { diff --git a/runtime/platform/system.h b/runtime/platform/system.h index c836e5ff222..56bd8432950 100644 --- a/runtime/platform/system.h +++ b/runtime/platform/system.h @@ -21,6 +21,9 @@ #if defined(ET_USE_LIBDL) #include #endif +#if defined(ET_USE_WINAPI) +#include +#endif static constexpr const char* DYNAMIC_LIBRARY_NOT_SUPPORTED = "NOT_SUPPORTED"; static constexpr const char* DYNAMIC_LIBRARY_NOT_FOUND = "NOT_FOUND"; @@ -41,6 +44,17 @@ inline const char* et_pal_get_shared_library_name(const void* addr) { } else { return DYNAMIC_LIBRARY_NOT_FOUND; } +#endif +#if defined(ET_USE_WINAPI) + HMODULE hModule = NULL; + if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + reinterpret_cast(addr), &hModule)) { + char path[MAX_PATH]; + if (GetModuleFileNameA(hModule, path, sizeof(path))) { + return path; + } + } + return DYNAMIC_LIBRARY_NOT_FOUND; #endif return DYNAMIC_LIBRARY_NOT_SUPPORTED; } diff --git a/runtime/platform/targets.bzl b/runtime/platform/targets.bzl index a4af1274024..93685e322c6 100644 --- a/runtime/platform/targets.bzl +++ b/runtime/platform/targets.bzl @@ -109,6 +109,7 @@ def define_common_targets(): "DEFAULT": [], "ovr_config//os:linux": ["-DET_USE_LIBDL"], "ovr_config//os:macos": ["-DET_USE_LIBDL"], + "ovr_config//os:windows": ["-DET_USE_WINAPI"], }, ), visibility = [ From 969b61293ceae010d4ce4646d790f9ac88250d36 Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 25 May 2024 16:20:19 +0800 Subject: [PATCH 03/25] Fix unknown type --- backends/qualcomm/aot/ir/qcir_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/qualcomm/aot/ir/qcir_utils.cpp b/backends/qualcomm/aot/ir/qcir_utils.cpp index d32c36149ff..f36f1848dab 100755 --- a/backends/qualcomm/aot/ir/qcir_utils.cpp +++ b/backends/qualcomm/aot/ir/qcir_utils.cpp @@ -129,7 +129,7 @@ flatbuffers::Offset ToQuantizeParam( case qcir::QuantizeType::AXIS_SCALE_OFFSET: { size_t len = param.axisScaleOffsetEncoding.numScaleOffsets; axis = param.axisScaleOffsetEncoding.axis; - for (uint i = 0; i < len; ++i) { + for (size_t i = 0; i < len; ++i) { data.emplace_back(qcir::ScaleOffset( param.axisScaleOffsetEncoding.scaleOffset[i].scale, param.axisScaleOffsetEncoding.scaleOffset[i].offset)); From e9f162f4e8a3ba6b5ea5841f3ad28add4e48fd39 Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 25 May 2024 16:21:35 +0800 Subject: [PATCH 04/25] Add ignoring files --- .gitignore | 1 + backends/qualcomm/.gitignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 backends/qualcomm/.gitignore diff --git a/.gitignore b/.gitignore index 27be53151fa..ed5f48432b9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ ethos-u-scratch/ executorch.egg-info pip-out/ __pycache__/ +.python-version # Any exported models and profiling outputs *.pte diff --git a/backends/qualcomm/.gitignore b/backends/qualcomm/.gitignore new file mode 100644 index 00000000000..b2ddb055dcb --- /dev/null +++ b/backends/qualcomm/.gitignore @@ -0,0 +1 @@ +*_generated.h From 00845eaec90dea5808beda1ac54ceaa9bdb0db7f Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 25 May 2024 16:43:07 +0800 Subject: [PATCH 05/25] Fix build for QNN --- .../runtime/backends/QnnSysImplementation.cpp | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/backends/qualcomm/runtime/backends/QnnSysImplementation.cpp b/backends/qualcomm/runtime/backends/QnnSysImplementation.cpp index 519dd867d4a..e471b54881c 100644 --- a/backends/qualcomm/runtime/backends/QnnSysImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnSysImplementation.cpp @@ -6,7 +6,11 @@ * LICENSE file in the root directory of this source tree. */ +#ifdef _WIN32 +#include +#else #include +#endif #include namespace torch { namespace executor { @@ -14,6 +18,28 @@ namespace qnn { Error QnnSystemImplementation::Load() { Qnn_ErrorHandle_t error = QNN_SUCCESS; +#ifdef _WIN32 + HMODULE lib_handle_ = LoadLibrary(lib_path_.c_str()); + if (lib_handle_ == nullptr) { + QNN_EXECUTORCH_LOG_ERROR( + "Cannot Open QNN library %s, with error: %d", + lib_path_.c_str(), + GetLastError()); + return Error::Internal; + } + + auto* get_providers = + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast( + GetProcAddress(lib_handle_, "QnnSystemInterface_getProviders")); + if (get_providers == nullptr) { + QNN_EXECUTORCH_LOG_ERROR( + "QnnSystemImplementation::Load Cannot load symbol " + "QnnSystemInterface_getProviders : %d", + GetLastError()); + return Error::Internal; + } +#else void* lib_handle_ = dlopen(lib_path_.c_str(), RTLD_NOW | RTLD_LOCAL); if (lib_handle_ == nullptr) { QNN_EXECUTORCH_LOG_ERROR( @@ -34,6 +60,7 @@ Error QnnSystemImplementation::Load() { dlerror()); return Error::Internal; } +#endif std::uint32_t num_providers; const QnnSystemInterface_t** provider_list = nullptr; @@ -64,12 +91,20 @@ Error QnnSystemImplementation::Unload() { if (lib_handle_ == nullptr) return Error::Ok; +#ifdef _WIN32 + if (!FreeLibrary(reinterpret_cast(lib_handle_))) { + QNN_EXECUTORCH_LOG_WARN( + "Failed to close QnnSystem library with error %d", GetLastError()); + return Error::Internal; + } +#else int dlclose_error = dlclose(lib_handle_); if (dlclose_error != 0) { QNN_EXECUTORCH_LOG_WARN( "Failed to close QnnSystem library with error %s", dlerror()); return Error::Internal; } +#endif lib_handle_ = nullptr; From f4a52a58f6f3ffd75ec3ad55f85ad7e334c80444 Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 25 May 2024 16:53:56 +0800 Subject: [PATCH 06/25] Correct impl --- extension/data_loader/mmap_data_loader.cpp | 34 +++------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index adb8c8e0b79..fcde5bf5433 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -72,36 +72,6 @@ const char* get_last_error_message() { } #endif -#ifdef _WIN32 -class FileHandle { - public: - explicit FileHandle(HANDLE handle) : handle_(handle) {} - ~FileHandle() { - if (handle_ != INVALID_HANDLE_VALUE) { - CloseHandle(handle_); - } - } - HANDLE get() const { return handle_; } - - private: - HANDLE handle_; -}; -#else -class FileHandle { - public: - explicit FileHandle(int fd) : fd_(fd) {} - ~FileHandle() { - if (fd_ >= 0) { - ::close(fd_); - } - } - int get() const { return fd_; } - - private: - int fd_; -}; -#endif - MmapDataLoader::~MmapDataLoader() { // file_name_ can be nullptr if this instance was moved from, but freeing a // null pointer is safe. @@ -110,6 +80,10 @@ MmapDataLoader::~MmapDataLoader() { if (mapping_handle_ != nullptr) { CloseHandle(mapping_handle_); } +#else + // fd_ can be -1 if this instance was moved from, but closing a negative fd is + // safe (though it will return an error). + ::close(fd_); #endif } From b2a056af179323eaf46dca77ad3f7866a96e0600 Mon Sep 17 00:00:00 2001 From: Hans Date: Sun, 26 May 2024 03:43:23 +0800 Subject: [PATCH 07/25] Fix not found symbol on Windows DLL --- backends/qualcomm/runtime/QnnManager.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index dc3217fc1c8..fb628da46fc 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -370,14 +370,31 @@ Error QnnManager::Compile( } // namespace qnn } // namespace executor } // namespace torch -void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) { + +extern "C" { + +#ifdef _WIN32 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdll-attribute-on-redeclaration" +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +EXPORT void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) { using torch::executor::qnn::SharedBuffer; void* buffer_ptr = SharedBuffer::GetSharedBufferManager().AllocMem(bytes, alignment); return buffer_ptr; } -void QnnExecuTorchFreeCustomMem(void* buffer_ptr) { +EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr) { using torch::executor::qnn::SharedBuffer; SharedBuffer::GetSharedBufferManager().FreeMem(buffer_ptr); } + +#ifdef _WIN32 +#pragma clang diagnostic pop +#endif + +} // extern "C" From 91f455107f419808c9d1b80d8fb05a6efa852fcf Mon Sep 17 00:00:00 2001 From: Hans Date: Wed, 29 May 2024 19:16:38 +0800 Subject: [PATCH 08/25] Just warning once --- backends/qualcomm/runtime/SharedBuffer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/qualcomm/runtime/SharedBuffer.cpp b/backends/qualcomm/runtime/SharedBuffer.cpp index 0a3eb874104..c15f6bc4113 100644 --- a/backends/qualcomm/runtime/SharedBuffer.cpp +++ b/backends/qualcomm/runtime/SharedBuffer.cpp @@ -102,8 +102,8 @@ bool SharedBuffer::IsAllocated(void* buf) { Error SharedBuffer::Load() { #ifndef __ANDROID__ - QNN_EXECUTORCH_LOG_ERROR("Shared buffer is not supported on this platform."); - return Error::Internal; + QNN_EXECUTORCH_LOG_WARN("Shared buffer is not supported on this platform."); + return Error::Ok; #else // On Android, 32-bit and 64-bit libcdsprpc.so can be found at /vendor/lib/ // and /vendor/lib64/ respectively. From 3c80ec40e78ba69af14492632812e1da6763e5ba Mon Sep 17 00:00:00 2001 From: Hans Date: Wed, 29 May 2024 18:28:45 +0800 Subject: [PATCH 09/25] Fix build for QNN --- backends/qualcomm/runtime/QnnExecuTorchBackend.cpp | 2 +- examples/qualcomm/executor_runner/qnn_executor_runner.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp index 77449703c5f..6d6128edc1b 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp @@ -26,7 +26,7 @@ Result QnnExecuTorchBackend::init( ArrayRef compile_specs) const { // covert SizedBuffer to qnn ExecuTorch option QnnExecuTorchContextBinary qnn_context_blob; - const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options; + const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr; qnn_context_blob.buffer = const_cast(processed->data()); qnn_context_blob.nbytes = processed->size(); diff --git a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp index 8998ee634e0..505a9596d53 100644 --- a/examples/qualcomm/executor_runner/qnn_executor_runner.cpp +++ b/examples/qualcomm/executor_runner/qnn_executor_runner.cpp @@ -33,6 +33,7 @@ #include #include +#include static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB From 86e2bc63570373c4498e79be05e805d4108e2e58 Mon Sep 17 00:00:00 2001 From: Hans Date: Wed, 29 May 2024 18:28:11 +0800 Subject: [PATCH 10/25] Force export `QnnExecuTorchAllocCustomMem`, `QnnExecuTorchFreeCustomMem` --- backends/qualcomm/runtime/QnnManager.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index fb628da46fc..5557767a7b1 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -371,14 +371,12 @@ Error QnnManager::Compile( } // namespace executor } // namespace torch -extern "C" { - #ifdef _WIN32 #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdll-attribute-on-redeclaration" #define EXPORT __declspec(dllexport) #else -#define EXPORT +#define EXPORT __attribute__((visibility("default"))) #endif EXPORT void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) { @@ -396,5 +394,3 @@ EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr) { #ifdef _WIN32 #pragma clang diagnostic pop #endif - -} // extern "C" From e8192ad3b3992f82e65ee5e010a33f6e4252da3e Mon Sep 17 00:00:00 2001 From: Hans Date: Mon, 27 May 2024 18:21:30 +0800 Subject: [PATCH 11/25] Reduce dup-compile for custom kernels --- build/executorch-config.cmake | 1 + examples/models/llama2/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake index 1682a676f11..982bc097a0d 100644 --- a/build/executorch-config.cmake +++ b/build/executorch-config.cmake @@ -55,6 +55,7 @@ set(lib_list optimized_native_cpu_ops_lib quantized_kernels quantized_ops_lib + custom_ops ) foreach(lib ${lib_list}) # Name of the variable which stores result of the find_library search diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt index 5044a5ce9bd..3bf5d97a795 100644 --- a/examples/models/llama2/CMakeLists.txt +++ b/examples/models/llama2/CMakeLists.txt @@ -126,7 +126,7 @@ endif() target_link_options_shared_lib(quantized_ops_lib) list(APPEND link_libraries quantized_kernels quantized_ops_lib) -if(EXECUTORCH_BUILD_KERNELS_CUSTOM) +if(EXECUTORCH_BUILD_KERNELS_CUSTOM OR TARGET custom_ops) target_link_options_shared_lib(custom_ops) list(APPEND link_libraries custom_ops) endif() From e99fc7378704c0a494e0f477bebaa5ef724aacca Mon Sep 17 00:00:00 2001 From: Hans Date: Wed, 29 May 2024 20:19:42 +0800 Subject: [PATCH 12/25] Use visibility instead dllexport --- backends/qualcomm/runtime/QnnManager.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index 5557767a7b1..d4be4675d5b 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -371,13 +371,7 @@ Error QnnManager::Compile( } // namespace executor } // namespace torch -#ifdef _WIN32 -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdll-attribute-on-redeclaration" -#define EXPORT __declspec(dllexport) -#else #define EXPORT __attribute__((visibility("default"))) -#endif EXPORT void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment) { using torch::executor::qnn::SharedBuffer; @@ -390,7 +384,3 @@ EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr) { using torch::executor::qnn::SharedBuffer; SharedBuffer::GetSharedBufferManager().FreeMem(buffer_ptr); } - -#ifdef _WIN32 -#pragma clang diagnostic pop -#endif From 4b6189f92f68109655744725a4617fa2479dd65b Mon Sep 17 00:00:00 2001 From: Hans Date: Thu, 30 May 2024 02:05:01 +0800 Subject: [PATCH 13/25] Fix QNN for Windows --- backends/qualcomm/runtime/QnnManager.h | 6 ++++++ backends/qualcomm/runtime/backends/QnnBackendCache.h | 4 ++++ backends/qualcomm/runtime/backends/QnnImplementation.cpp | 7 +++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h index 639d3534de4..091615c38ff 100644 --- a/backends/qualcomm/runtime/QnnManager.h +++ b/backends/qualcomm/runtime/QnnManager.h @@ -75,9 +75,15 @@ class QnnManager { private: Error LoadQnnLibrary(); +#ifdef _WIN32 + static constexpr const char* htp_library_name_ = "QnnHtp.dll"; + static constexpr const char* gpu_library_name_ = "QnnGpu.dll"; + static constexpr const char* dsp_library_name_ = "QnnDsp.dll"; +#else static constexpr const char* htp_library_name_ = "libQnnHtp.so"; static constexpr const char* gpu_library_name_ = "libQnnGpu.so"; static constexpr const char* dsp_library_name_ = "libQnnDsp.so"; +#endif QnnExecuTorchContextBinary qnn_context_blob_; std::unique_ptr backend_params_ptr_; diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.h b/backends/qualcomm/runtime/backends/QnnBackendCache.h index ad6d3d0bd7b..9cb77b4043c 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCache.h +++ b/backends/qualcomm/runtime/backends/QnnBackendCache.h @@ -58,7 +58,11 @@ class QnnBackendCache { QnnExecuTorchContextBinary qnn_context_blob_; QnnSystemContext_Handle_t sys_context_handle_{nullptr}; +#ifdef _WIN32 + QnnSystemImplementation qnn_sys_impl_{ "QnnSystem.dll" }; +#else QnnSystemImplementation qnn_sys_impl_{"libQnnSystem.so"}; +#endif std::string graph_name_; std::vector input_tensor_structs_; std::vector output_tensor_structs_; diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp index 5c537319c5c..71bfc151e66 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp @@ -69,12 +69,15 @@ Error QnnImplementation::StartBackend( #endif if (lib_handle == nullptr) { +#ifdef _WIN32 QNN_EXECUTORCH_LOG_ERROR( - "Cannot Open QNN library %s, with error: %s", + "Cannot Open QNN library %s, with error: %d", lib_path.c_str(), -#ifdef _WIN32 GetLastError()); #else + QNN_EXECUTORCH_LOG_ERROR( + "Cannot Open QNN library %s, with error: %s", + lib_path.c_str(), dlerror()); #endif return Error::Internal; From b512236ec647abc0589481b200f773142305f8d3 Mon Sep 17 00:00:00 2001 From: Hans Date: Thu, 30 May 2024 02:09:12 +0800 Subject: [PATCH 14/25] Support Windows platform --- build/Utils.cmake | 14 ++ build/extract_sources.py | 24 +++- runtime/platform/target/Windows.cpp | 215 ++++++++++++++++++++++++++++ runtime/platform/targets.bzl | 8 +- 4 files changed, 256 insertions(+), 5 deletions(-) create mode 100644 runtime/platform/target/Windows.cpp diff --git a/build/Utils.cmake b/build/Utils.cmake index 56fc1e104b0..20ba25f7f1b 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -171,11 +171,25 @@ function(extract_sources sources_file) set(executorch_root ${CMAKE_CURRENT_SOURCE_DIR}) endif() + set(TARGET_PLATFORM ${CMAKE_SYSTEM_NAME}) + string(TOLOWER ${TARGET_PLATFORM} TARGET_PLATFORM) + + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR + CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + set(TARGET_ARCH "aarch64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(TARGET_ARCH "x8664") + else() + set(TARGET_ARCH "default") + endif() + execute_process( COMMAND ${PYTHON_EXECUTABLE} ${executorch_root}/build/extract_sources.py --config=${executorch_root}/build/cmake_deps.toml --out=${sources_file} --buck2=${BUCK2} + --platform=${TARGET_PLATFORM} + --arch=${TARGET_ARCH} OUTPUT_VARIABLE gen_srcs_output ERROR_VARIABLE gen_srcs_error RESULT_VARIABLE gen_srcs_exit_code diff --git a/build/extract_sources.py b/build/extract_sources.py index ce8b3de9812..4ed8a842904 100755 --- a/build/extract_sources.py +++ b/build/extract_sources.py @@ -96,7 +96,13 @@ def __init__( else: self._config[k] = v - def get_sources(self, graph: "Graph", runner: Buck2Runner) -> frozenset[str]: + def get_sources( + self, + graph: "Graph", + runner: Buck2Runner, + platform: str = "default", + arch: str = "default" + ) -> frozenset[str]: if self._state == Target._InitState.READY: return self._sources # Detect cycles. @@ -113,7 +119,7 @@ def get_sources(self, graph: "Graph", runner: Buck2Runner) -> frozenset[str]: ) # Get the complete list of source files that this target depends on. - sources: set[str] = set(runner.run(["cquery", query])) + sources: set[str] = set(runner.run(["cquery", query, "--fake-host", platform, "--fake-arch", arch])) # Keep entries that match all of the filters. filters = [re.compile(p) for p in self._config.get("filters", [])] @@ -128,7 +134,7 @@ def get_sources(self, graph: "Graph", runner: Buck2Runner) -> frozenset[str]: # its deps. Remove entries that are already covered by the transitive # set of dependencies. for dep in self._config.get("deps", []): - sources.difference_update(graph.by_name[dep].get_sources(graph, runner)) + sources.difference_update(graph.by_name[dep].get_sources(graph, runner, platform, arch)) self._sources = frozenset(sources) self._state = Target._InitState.READY @@ -173,6 +179,16 @@ def parse_args() -> argparse.Namespace: metavar="file", help="Path to the file to generate.", ) + parser.add_argument( + "--platform", + default="default", + help="Target platform for the build system", + ) + parser.add_argument( + "--arch", + default="default", + help="Target architecture for the build system", + ) return parser.parse_args() @@ -200,7 +216,7 @@ def main(): target_to_srcs: dict[str, list[str]] = {} runner: Buck2Runner = Buck2Runner(args.buck2) for name, target in graph.by_name.items(): - target_to_srcs[name] = sorted(target.get_sources(graph, runner)) + target_to_srcs[name] = sorted(target.get_sources(graph, runner, args.platform, args.arch)) # Generate the requested format. output: bytes diff --git a/runtime/platform/target/Windows.cpp b/runtime/platform/target/Windows.cpp new file mode 100644 index 00000000000..8eb66604082 --- /dev/null +++ b/runtime/platform/target/Windows.cpp @@ -0,0 +1,215 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +/** + * @file + * Fallback PAL implementations for POSIX-compatible systems. + * + * Note that this assumes that the platform defines the symbols used in this + * file (like fprintf()), because this file will still be built even if the + * functions are later overridden. When building for a platform that does not + * provide the necessary symbols, clients can use Minimal.cpp instead, but they + * will need to override all of the functions. + */ + +// This cpp file will provide weak implementations of the symbols declared in +// Platform.h. Client users can strongly define any or all of the functions to +// override them. +#define ET_INTERNAL_PLATFORM_WEAKNESS __ET_WEAK +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#define SHARED_MEMORY_NAME "torch_executor_platform_init_time" + +// The FILE* to write logs to. +#define ET_LOG_OUTPUT_FILE stderr + +/** + * On debug builds, ensure that `et_pal_init` has been called before + * other PAL functions which depend on initialization. + */ +#ifdef NDEBUG + +/** + * Assert that the PAL has been initialized. + */ +#define _ASSERT_PAL_INITIALIZED() ((void)0) + +#else // NDEBUG + +/** + * Assert that the PAL has been initialized. + */ +#define _ASSERT_PAL_INITIALIZED() \ + ({ \ + if (!check_shared_memory()) { \ + fprintf( \ + ET_LOG_OUTPUT_FILE, \ + "ExecuTorch PAL must be initialized before call to %s()", \ + __ET_FUNCTION); \ + fflush(ET_LOG_OUTPUT_FILE); \ + et_pal_abort(); \ + } \ + }) + +#endif // NDEBUG + +/// Start time of the system (used to zero the system timestamp). +static std::shared_ptr> systemStartTime = nullptr; + +bool check_shared_memory() { + if (systemStartTime != nullptr) { + return true; + } + + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, // read/write access + FALSE, // do not inherit the name + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + + if (hMapFile == NULL) { + // Create a new file mapping object + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + sizeof(std::chrono::time_point), // maximum object size (low-order DWORD) + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + if (hMapFile == NULL) { + return false; + } + } + + systemStartTime =std::shared_ptr>( + (std::chrono::time_point*) MapViewOfFile( + hMapFile, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, + 0, + sizeof(std::chrono::time_point) + ) + ); + + if (systemStartTime == nullptr) { + return false; + } + + return true; +} + +/** + * Initialize the platform abstraction layer. + * + * This function should be called before any other function provided by the PAL + * to initialize any global state. Typically overridden by PAL implementer. + */ +void et_pal_init(void) { + if (check_shared_memory()) { + return; + } + *systemStartTime = std::chrono::steady_clock::now(); +} + +/** + * Immediately abort execution, setting the device into an error state, if + * available. + */ +__ET_NORETURN void et_pal_abort(void) { + std::abort(); +} + +/** + * Return a monotonically non-decreasing timestamp in system ticks. + * + * @retval Timestamp value in system ticks. + */ +et_timestamp_t et_pal_current_ticks(void) { + _ASSERT_PAL_INITIALIZED(); + auto systemCurrentTime = std::chrono::steady_clock::now(); + return std::chrono::duration_cast( + systemCurrentTime - *systemStartTime) + .count(); +} + +/** + * Return the conversion rate from system ticks to nanoseconds, as a fraction. + * To convert an interval from system ticks to nanoseconds, multiply the tick + * count by the numerator and then divide by the denominator: + * nanoseconds = ticks * numerator / denominator + * + * @retval The ratio of nanoseconds to system ticks. + */ +et_tick_ratio_t et_pal_ticks_to_ns_multiplier(void) { + // The system tick interval is 1 nanosecond, so the conversion factor is 1. + return {1, 1}; +} + +/** + * Emit a log message via platform output (serial port, console, etc). + * + * @param[in] timestamp Timestamp of the log event in system ticks since boot. + * @param[in] level Severity level of the message. Must be a printable 7-bit + * ASCII uppercase letter. + * @param[in] filename Name of the file that created the log event. + * @param[in] function Name of the function that created the log event. + * @param[in] line Line in the source file where the log event was created. + * @param[in] message Message string to log. + * @param[in] length Message string length. + */ +void et_pal_emit_log_message( + et_timestamp_t timestamp, + et_pal_log_level_t level, + const char* filename, + __ET_UNUSED const char* function, + size_t line, + const char* message, + __ET_UNUSED size_t length) { + _ASSERT_PAL_INITIALIZED(); + + // Not all platforms have ticks == nanoseconds, but this one does. + timestamp /= 1000; // To microseconds + unsigned long int us = timestamp % 1000000; + timestamp /= 1000000; // To seconds + unsigned int sec = timestamp % 60; + timestamp /= 60; // To minutes + unsigned int min = timestamp % 60; + timestamp /= 60; // To hours + unsigned int hour = timestamp; + + // Use a format similar to glog and folly::logging, except: + // - Print time since et_pal_init since we don't have wall time + // - Don't include the thread ID, to avoid adding a threading dependency + // - Add the string "executorch:" to make the logs more searchable + // + // Clients who want to change the format or add other fields can override this + // weak implementation of et_pal_emit_log_message. + fprintf( + ET_LOG_OUTPUT_FILE, + "%c %02u:%02u:%02u.%06lu executorch:%s:%zu] %s\n", + level, + hour, + min, + sec, + us, + filename, + line, + message); + fflush(ET_LOG_OUTPUT_FILE); +} diff --git a/runtime/platform/targets.bzl b/runtime/platform/targets.bzl index 93685e322c6..d03129fe313 100644 --- a/runtime/platform/targets.bzl +++ b/runtime/platform/targets.bzl @@ -6,7 +6,12 @@ def _select_pal(dict_): `executorch.pal_default` build config value. Fails if no corresponding entry exists. """ - pal_default = native.read_config("executorch", "pal_default", "posix") + # buck2 check target platform + # check config//os: + if host_info().os.is_windows: + pal_default = native.read_config("executorch", "pal_default", "windows") + else: + pal_default = native.read_config("executorch", "pal_default", "posix") if not pal_default in dict_: fail("Missing key for executorch.pal_default value '{}' in dict '{}'".format(pal_default, dict_)) return dict_[pal_default] @@ -44,6 +49,7 @@ def define_common_targets(): srcs = _select_pal({ "minimal": ["target/Minimal.cpp"], "posix": ["target/Posix.cpp"], + "windows": ["target/Windows.cpp"], }), deps = [ ":pal_interface", From f41acfd0f78f246c5c7e2bad07f589286af6425f Mon Sep 17 00:00:00 2001 From: Hans Date: Thu, 30 May 2024 02:09:44 +0800 Subject: [PATCH 15/25] File loader support Windows API --- extension/data_loader/file_data_loader.cpp | 95 ++++++++++++++++++++++ extension/data_loader/file_data_loader.h | 15 +++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index f1a3d335c0d..47ed9d55fd9 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -12,10 +12,14 @@ #include #include +#ifdef _WIN32 +#include +#else #include #include #include #include +#endif #include #include @@ -54,9 +58,15 @@ FileDataLoader::~FileDataLoader() { // file_name_ can be nullptr if this instance was moved from, but freeing a // null pointer is safe. std::free(const_cast(file_name_)); +#ifdef _WIN32 + if (fd_ != INVALID_HANDLE_VALUE) { + CloseHandle(fd_); + } +#else // fd_ can be -1 if this instance was moved from, but closing a negative fd is // safe (though it will return an error). ::close(fd_); +#endif } Result FileDataLoader::from( @@ -68,6 +78,34 @@ Result FileDataLoader::from( "Alignment %zu is not a power of 2", alignment); +#ifdef _WIN32 + HANDLE fd = CreateFile( + file_name, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL); + if (fd == INVALID_HANDLE_VALUE) { + ET_LOG( + Error, "Failed to open %s: %lu", file_name, GetLastError()); + return Error::AccessFailed; + } + + LARGE_INTEGER file_size_li; + if (!GetFileSizeEx(fd, &file_size_li)) { + ET_LOG( + Error, + "Could not get length of %s: %lu", + file_name, + GetLastError()); + CloseHandle(fd); + return Error::AccessFailed; + } + size_t file_size = static_cast(file_size_li.QuadPart); + +#else // Use open() instead of fopen() to avoid the layer of buffering that // fopen() does. We will be reading large portions of the file in one shot, // so buffering does not help. @@ -92,12 +130,17 @@ Result FileDataLoader::from( return Error::AccessFailed; } size_t file_size = st.st_size; +#endif // Copy the filename so we can print better debug messages if reads fail. const char* file_name_copy = ::strdup(file_name); if (file_name_copy == nullptr) { ET_LOG(Error, "strdup(%s) failed", file_name); +#ifdef _WIN32 + CloseHandle(fd); +#else ::close(fd); +#endif return Error::MemoryAllocationFailed; } @@ -121,7 +164,11 @@ void FreeSegment(void* context, void* data, __ET_UNUSED size_t size) { Result FileDataLoader::Load(size_t offset, size_t size) { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. +#ifdef _WIN32 + fd_ != INVALID_HANDLE_VALUE, +#else fd_ >= 0, +#endif InvalidState, "Uninitialized"); ET_CHECK_OR_RETURN_ERROR( @@ -138,6 +185,20 @@ Result FileDataLoader::Load(size_t offset, size_t size) { return FreeableBuffer(nullptr, 0, /*free_fn=*/nullptr); } +#ifdef _WIN32 + // Seek to the right place in the file. + LARGE_INTEGER li; + li.QuadPart = offset; + if (!SetFilePointerEx(fd_, li, NULL, FILE_BEGIN)) { + ET_LOG( + Error, + "Seeking %s to offset %zu failed: %lu", + file_name_, + offset, + GetLastError()); + return Error::AccessFailed; + } +#else // Seek to the right place in the file. off_t seek_offset = ::lseek(fd_, offset, SEEK_SET); if (seek_offset != offset) { @@ -150,6 +211,7 @@ Result FileDataLoader::Load(size_t offset, size_t size) { strerror(errno)); return Error::AccessFailed; } +#endif // Allocate memory for the FreeableBuffer. size_t alloc_size = size; @@ -185,6 +247,34 @@ Result FileDataLoader::Load(size_t offset, size_t size) { // Read the data into the aligned address. size_t needed = size; uint8_t* buf = reinterpret_cast(aligned_buffer); +#ifdef _WIN32 + while (needed > 0) { + DWORD nread = 0; + if (!ReadFile(fd_, buf, static_cast(needed), &nread, NULL)) { + ET_LOG( + Error, + "Reading from %s: failed to read %zu bytes at offset %zu: %lu", + file_name_, + size, + offset, + GetLastError()); + std::free(buffer); + return Error::AccessFailed; + } + if (nread == 0) { + ET_LOG( + Error, + "Reading from %s: failed to read %zu bytes at offset %zu: EOF", + file_name_, + size, + offset); + std::free(buffer); + return Error::AccessFailed; + } + needed -= nread; + buf += nread; + } +#else while (needed > 0) { ssize_t nread = ::read(fd_, buf, needed); if (nread < 0 && errno == EINTR) { @@ -208,6 +298,7 @@ Result FileDataLoader::Load(size_t offset, size_t size) { needed -= nread; buf += nread; } +#endif // We can't naively free this pointer, since it may not be what malloc() gave // us. Pass the offset to the real buffer as context. This is the number of @@ -228,7 +319,11 @@ Result FileDataLoader::Load(size_t offset, size_t size) { Result FileDataLoader::size() const { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. +#ifdef _WIN32 + fd_ != INVALID_HANDLE_VALUE, +#else fd_ >= 0, +#endif InvalidState, "Uninitialized"); return file_size_; diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h index ade3b515abf..1e6a00043d9 100644 --- a/extension/data_loader/file_data_loader.h +++ b/extension/data_loader/file_data_loader.h @@ -8,6 +8,15 @@ #pragma once +#ifdef _WIN32 +#include +#define FD_TYPE HANDLE +#define INVALID_FD INVALID_HANDLE_VALUE +#else +#define FD_TYPE int +#define INVALID_FD -1 +#endif + #include #include @@ -60,7 +69,7 @@ class FileDataLoader : public DataLoader { rhs.file_name_ = nullptr; rhs.file_size_ = 0; rhs.alignment_ = 0; - rhs.fd_ = -1; + rhs.fd_ = INVALID_FD; } ~FileDataLoader() override; @@ -72,7 +81,7 @@ class FileDataLoader : public DataLoader { private: FileDataLoader( - int fd, + FD_TYPE fd, size_t file_size, size_t alignment, const char* file_name) @@ -89,7 +98,7 @@ class FileDataLoader : public DataLoader { const char* file_name_; // Owned by the instance. size_t file_size_; size_t alignment_; - int fd_; // Owned by the instance. + FD_TYPE fd_; // Owned by the instance. }; } // namespace util From f0a8b034f2e859c353748dbbd0f9c94d552765ee Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 1 Jun 2024 04:18:57 +0800 Subject: [PATCH 16/25] Make cmake `LIB_xxx` point to correct lib path --- build/executorch-config.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/build/executorch-config.cmake b/build/executorch-config.cmake index 982bc097a0d..a13861bc3c4 100644 --- a/build/executorch-config.cmake +++ b/build/executorch-config.cmake @@ -12,6 +12,10 @@ cmake_minimum_required(VERSION 3.19) +if(CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(CMAKE_FIND_LIBRARY_SUFFIXES ".dll;.a") +endif() + set(_root "${CMAKE_CURRENT_LIST_DIR}/../..") set(required_lib_list executorch executorch_no_prim_ops portable_kernels) foreach(lib ${required_lib_list}) @@ -77,8 +81,7 @@ foreach(lib ${lib_list}) # keep all libs as static when CMAKE_TOOLCHAIN_IOS is used add_library(${lib} STATIC IMPORTED) endif() - if ("${${lib_var}}" MATCHES ".dll.a$") - string(REGEX REPLACE ".dll.a$" ".dll" ${lib_var} "${${lib_var}}") + if ("${${lib_var}}" MATCHES ".dll$") set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION "${${lib_var}}" IMPORTED_IMPLIB "${${lib_var}}.a") else() set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION "${${lib_var}}") From 49add79d4d66073fa88a8ad997498bd56d5d7c9b Mon Sep 17 00:00:00 2001 From: Hans Date: Sun, 2 Jun 2024 01:34:34 +0800 Subject: [PATCH 17/25] Move shared memory management to platform implement --- runtime/backend/interface.cpp | 86 ++++--------------- runtime/kernel/operator_registry.cpp | 81 ++++-------------- runtime/platform/platform.h | 21 +++++ runtime/platform/target/Minimal.cpp | 8 ++ runtime/platform/target/Posix.cpp | 32 +++++++ runtime/platform/target/Windows.cpp | 119 +++++++++++++++++++-------- 6 files changed, 176 insertions(+), 171 deletions(-) diff --git a/runtime/backend/interface.cpp b/runtime/backend/interface.cpp index 3ec3af6307f..2e84d8e4210 100644 --- a/runtime/backend/interface.cpp +++ b/runtime/backend/interface.cpp @@ -8,87 +8,33 @@ #include #include +#include -#ifdef _WIN32 -#include -#include -#include -#define getpid GetCurrentProcessId -#else -#include -#endif +namespace torch { +namespace executor { + +PyTorchBackendInterface::~PyTorchBackendInterface() {} // Task t128866626: Remove global static variables. // We want to be able to run multiple Executor instances // and having a global registration isn't a viable solution // in the long term. -#ifdef _WIN32 - -#define SHARED_MEMORY_NAME "torch_executor_backend_registry" -static std::shared_ptr backend_reg; - +torch::executor::BackendRegistry& getBackendRegistry(); torch::executor::BackendRegistry& getBackendRegistry() { - if (backend_reg != nullptr) { - return *backend_reg; - } - - HANDLE hMapFile = OpenFileMapping( - FILE_MAP_ALL_ACCESS, // read/write access - FALSE, // do not inherit the name - _T(SHARED_MEMORY_NAME) // name of mapping object + // Operator registration happens in static initialization time when PAL init + // may or may not happen already. Here we are assuming et_pal_init() doesn't + // have any side effect even if falled multiple times. + ::et_pal_init(); + + static torch::executor::BackendRegistry* backend_reg = static_cast( + ::et_pal_get_shared_memory( + "torch_executor_backend_registry", + sizeof(torch::executor::BackendRegistry) + ) ); - - if (hMapFile == NULL) { - // Create a new file mapping object - hMapFile = CreateFileMapping( - INVALID_HANDLE_VALUE, // use paging file - NULL, // default security - PAGE_READWRITE, // read/write access - 0, // maximum object size (high-order DWORD) - sizeof(torch::executor::BackendRegistry), // maximum object size (low-order DWORD) - _T(SHARED_MEMORY_NAME) // name of mapping object - ); - if (hMapFile == NULL) { - return *backend_reg; - } - } - - torch::executor::BackendRegistry* registry = (torch::executor::BackendRegistry*) MapViewOfFile( - hMapFile, // handle to map object - FILE_MAP_ALL_ACCESS, // read/write permission - 0, - 0, - sizeof(torch::executor::BackendRegistry) - ); - - if (registry == NULL) { - return *backend_reg; - } - - if (backend_reg == nullptr) { - backend_reg = std::shared_ptr(registry, [](torch::executor::BackendRegistry* ptr) { - UnmapViewOfFile(ptr); - }); - } - return *backend_reg; } -#else - -torch::executor::BackendRegistry& getBackendRegistry(); -torch::executor::BackendRegistry& getBackendRegistry() { - static torch::executor::BackendRegistry backend_reg; - return backend_reg; -} - -#endif - -namespace torch { -namespace executor { - -PyTorchBackendInterface::~PyTorchBackendInterface() {} - PyTorchBackendInterface* get_backend_class(const char* name) { return getBackendRegistry().get_backend_class(name); } diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp index 02ee9b60ff7..e7bd216fde1 100644 --- a/runtime/kernel/operator_registry.cpp +++ b/runtime/kernel/operator_registry.cpp @@ -8,80 +8,32 @@ #include +#include #include #include #include #include -#ifdef _WIN32 -#include -#include -#include - -#define SHARED_MEMORY_NAME "torch_executor_operator_registry" -static std::shared_ptr operator_reg; +namespace torch { +namespace executor { +torch::executor::OperatorRegistry& getOperatorRegistry(); torch::executor::OperatorRegistry& getOperatorRegistry() { - if (operator_reg != nullptr) { - return *operator_reg; - } - - HANDLE hMapFile = OpenFileMapping( - FILE_MAP_ALL_ACCESS, // read/write access - FALSE, // do not inherit the name - _T(SHARED_MEMORY_NAME) // name of mapping object - ); - - if (hMapFile == NULL) { - // Create a new file mapping object - hMapFile = CreateFileMapping( - INVALID_HANDLE_VALUE, // use paging file - NULL, // default security - PAGE_READWRITE, // read/write access - 0, // maximum object size (high-order DWORD) - sizeof(torch::executor::OperatorRegistry), // maximum object size (low-order DWORD) - _T(SHARED_MEMORY_NAME) // name of mapping object - ); - if (hMapFile == NULL) { - return *operator_reg; - } - } + // Operator registration happens in static initialization time when PAL init + // may or may not happen already. Here we are assuming et_pal_init() doesn't + // have any side effect even if falled multiple times. + ::et_pal_init(); - torch::executor::OperatorRegistry* registry = (torch::executor::OperatorRegistry*) MapViewOfFile( - hMapFile, // handle to map object - FILE_MAP_ALL_ACCESS, // read/write permission - 0, - 0, - sizeof(torch::executor::OperatorRegistry) + static torch::executor::OperatorRegistry* operator_registry = static_cast( + ::et_pal_get_shared_memory( + "torch_executor_operator_registry", + sizeof(torch::executor::OperatorRegistry) + ) ); - - if (registry == NULL) { - return *operator_reg; - } - - if (operator_reg == nullptr) { - operator_reg = std::shared_ptr(registry, [](torch::executor::OperatorRegistry* ptr) { - UnmapViewOfFile(ptr); - }); - } - - return *operator_reg; -} - -#else - -torch::executor::OperatorRegistry& getOperatorRegistry(); -torch::executor::OperatorRegistry& getOperatorRegistry() { - static torch::executor::OperatorRegistry operator_registry; - return operator_registry; + return *operator_registry; } -#endif - -namespace torch { -namespace executor { - Error register_kernels(const ArrayRef& kernels) { Error success = getOperatorRegistry().register_kernels(kernels); if (success == Error::InvalidArgument || success == Error::Internal) { @@ -95,11 +47,6 @@ Error register_kernels(const ArrayRef& kernels) { } Error OperatorRegistry::register_kernels(const ArrayRef& kernels) { - // Operator registration happens in static initialization time when PAL init - // may or may not happen already. Here we are assuming et_pal_init() doesn't - // have any side effect even if falled multiple times. - ::et_pal_init(); - if (kernels.size() + this->num_kernels_ > kMaxNumOfKernels) { ET_LOG( Error, diff --git a/runtime/platform/platform.h b/runtime/platform/platform.h index 1f1b3b4c173..a166166e2f0 100644 --- a/runtime/platform/platform.h +++ b/runtime/platform/platform.h @@ -115,4 +115,25 @@ void et_pal_emit_log_message( const char* message, size_t length) ET_INTERNAL_PLATFORM_WEAKNESS; +/** + * Get a shared memory region by name. + * If the shared memory region does not exist, it will be created. + * + * @param[in] name Name of the shared memory region. + * @param[in] size Size of the shared memory region in bytes. + * + * @retval A pointer to the shared memory region on success. nullptr on failure. + */ +void* et_pal_get_shared_memory( + const char* name, + size_t size) ET_INTERNAL_PLATFORM_WEAKNESS; + +/** + * Free a shared memory region by name. + * + * @param[in] name Name of the shared memory region. + */ +void et_pal_free_shared_memory( + const char* name) ET_INTERNAL_PLATFORM_WEAKNESS; + } // extern "C" diff --git a/runtime/platform/target/Minimal.cpp b/runtime/platform/target/Minimal.cpp index ed6cac7392c..65949e9c535 100644 --- a/runtime/platform/target/Minimal.cpp +++ b/runtime/platform/target/Minimal.cpp @@ -47,3 +47,11 @@ void et_pal_emit_log_message( __ET_UNUSED size_t line, __ET_UNUSED const char* message, __ET_UNUSED size_t length) {} + +void* et_pal_get_shared_memory( + __ET_UNUSED const char* name, + __ET_UNUSED size_t size) { + return nullptr; +} + +void et_pal_free_shared_memory(__ET_UNUSED void* ptr) {} diff --git a/runtime/platform/target/Posix.cpp b/runtime/platform/target/Posix.cpp index f434043d838..79ea3b29c73 100644 --- a/runtime/platform/target/Posix.cpp +++ b/runtime/platform/target/Posix.cpp @@ -27,6 +27,9 @@ #include #include #include +#include +#include +#include #include @@ -69,6 +72,9 @@ static std::chrono::time_point systemStartTime; /// Flag set to true if the PAL has been successfully initialized. static bool initialized = false; +// Shared memory +static std::unordered_map> sharedMemoryMap; + /** * Initialize the platform abstraction layer. * @@ -170,3 +176,29 @@ void et_pal_emit_log_message( message); fflush(ET_LOG_OUTPUT_FILE); } + +void* et_pal_get_shared_memory( + const char* name, + size_t size) { + _ASSERT_PAL_INITIALIZED(); + + auto it = sharedMemoryMap.find(name); + if (it != sharedMemoryMap.end()) { + return it->second.get(); + } + + auto sharedMemory = std::shared_ptr(malloc(size), free); + if (sharedMemory == nullptr) { + return nullptr; + } + + sharedMemoryMap[name] = sharedMemory; + return sharedMemory.get(); +} + +void et_pal_free_shared_memory( + const char* name) { + _ASSERT_PAL_INITIALIZED(); + + sharedMemoryMap.erase(name); +} diff --git a/runtime/platform/target/Windows.cpp b/runtime/platform/target/Windows.cpp index 8eb66604082..8b568da9fa1 100644 --- a/runtime/platform/target/Windows.cpp +++ b/runtime/platform/target/Windows.cpp @@ -30,11 +30,12 @@ #include #include #include +#include +#include +#include #include -#define SHARED_MEMORY_NAME "torch_executor_platform_init_time" - // The FILE* to write logs to. #define ET_LOG_OUTPUT_FILE stderr @@ -69,48 +70,30 @@ #endif // NDEBUG /// Start time of the system (used to zero the system timestamp). -static std::shared_ptr> systemStartTime = nullptr; +static std::chrono::time_point* systemStartTime = nullptr; + +// Shared memory +typedef std::pair, HANDLE> SharedMemory; +static std::unordered_map sharedMemoryMap; +void* et_pal_get_shared_memory_internal( + const char* name, + size_t size); bool check_shared_memory() { if (systemStartTime != nullptr) { return true; } - HANDLE hMapFile = OpenFileMapping( - FILE_MAP_ALL_ACCESS, // read/write access - FALSE, // do not inherit the name - _T(SHARED_MEMORY_NAME) // name of mapping object - ); - - if (hMapFile == NULL) { - // Create a new file mapping object - hMapFile = CreateFileMapping( - INVALID_HANDLE_VALUE, // use paging file - NULL, // default security - PAGE_READWRITE, // read/write access - 0, // maximum object size (high-order DWORD) - sizeof(std::chrono::time_point), // maximum object size (low-order DWORD) - _T(SHARED_MEMORY_NAME) // name of mapping object - ); - if (hMapFile == NULL) { - return false; - } - } - - systemStartTime =std::shared_ptr>( - (std::chrono::time_point*) MapViewOfFile( - hMapFile, // handle to map object - FILE_MAP_ALL_ACCESS, // read/write permission - 0, - 0, - sizeof(std::chrono::time_point) - ) + void *sharedMemory = et_pal_get_shared_memory_internal( + "torch_executor_platform_init_time", + sizeof(std::chrono::time_point) ); - - if (systemStartTime == nullptr) { + if (sharedMemory == nullptr) { return false; } + systemStartTime = static_cast*>(sharedMemory); + *systemStartTime = std::chrono::steady_clock::now(); return true; } @@ -213,3 +196,71 @@ void et_pal_emit_log_message( message); fflush(ET_LOG_OUTPUT_FILE); } + +void* et_pal_get_shared_memory_internal( + const char* name, + size_t size) { + + auto it = sharedMemoryMap.find(name); + if (it != sharedMemoryMap.end()) { + return it->second.first.get(); + } + + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, + FALSE, + name); + if (hMapFile == NULL) { + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, + NULL, + PAGE_READWRITE, + 0, + size, + name); + if (hMapFile == NULL) { + return nullptr; + } + } + + void* sharedMemory = MapViewOfFile( + hMapFile, + FILE_MAP_ALL_ACCESS, + 0, + 0, + size); + if (sharedMemory == NULL) { + CloseHandle(hMapFile); + return nullptr; + } + + sharedMemoryMap[name] = { + std::shared_ptr( + sharedMemory, + [hMapFile](void* ptr) { + UnmapViewOfFile(ptr); + CloseHandle(hMapFile); + } + ), + hMapFile + }; + return sharedMemoryMap[name].first.get(); +} + +void* et_pal_get_shared_memory( + const char* name, + size_t size) { + _ASSERT_PAL_INITIALIZED(); + return et_pal_get_shared_memory_internal(name, size); +} + +void et_pal_free_shared_memory( + const char* name) { + _ASSERT_PAL_INITIALIZED(); + auto it = sharedMemoryMap.find(name); + if (it == sharedMemoryMap.end()) { + return; + } + + sharedMemoryMap.erase(it); +} From ba90d0b1f6b63a42b381503c6d36e6820b9167e7 Mon Sep 17 00:00:00 2001 From: Hans Date: Mon, 3 Jun 2024 02:01:27 +0800 Subject: [PATCH 18/25] Revert "Move shared memory management to platform implement" This reverts commit 49add79d4d66073fa88a8ad997498bd56d5d7c9b. --- runtime/backend/interface.cpp | 86 +++++++++++++++---- runtime/kernel/operator_registry.cpp | 81 ++++++++++++++---- runtime/platform/platform.h | 21 ----- runtime/platform/target/Minimal.cpp | 8 -- runtime/platform/target/Posix.cpp | 32 ------- runtime/platform/target/Windows.cpp | 119 ++++++++------------------- 6 files changed, 171 insertions(+), 176 deletions(-) diff --git a/runtime/backend/interface.cpp b/runtime/backend/interface.cpp index 2e84d8e4210..3ec3af6307f 100644 --- a/runtime/backend/interface.cpp +++ b/runtime/backend/interface.cpp @@ -8,33 +8,87 @@ #include #include -#include -namespace torch { -namespace executor { - -PyTorchBackendInterface::~PyTorchBackendInterface() {} +#ifdef _WIN32 +#include +#include +#include +#define getpid GetCurrentProcessId +#else +#include +#endif // Task t128866626: Remove global static variables. // We want to be able to run multiple Executor instances // and having a global registration isn't a viable solution // in the long term. -torch::executor::BackendRegistry& getBackendRegistry(); +#ifdef _WIN32 + +#define SHARED_MEMORY_NAME "torch_executor_backend_registry" +static std::shared_ptr backend_reg; + torch::executor::BackendRegistry& getBackendRegistry() { - // Operator registration happens in static initialization time when PAL init - // may or may not happen already. Here we are assuming et_pal_init() doesn't - // have any side effect even if falled multiple times. - ::et_pal_init(); - - static torch::executor::BackendRegistry* backend_reg = static_cast( - ::et_pal_get_shared_memory( - "torch_executor_backend_registry", - sizeof(torch::executor::BackendRegistry) - ) + if (backend_reg != nullptr) { + return *backend_reg; + } + + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, // read/write access + FALSE, // do not inherit the name + _T(SHARED_MEMORY_NAME) // name of mapping object ); + + if (hMapFile == NULL) { + // Create a new file mapping object + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + sizeof(torch::executor::BackendRegistry), // maximum object size (low-order DWORD) + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + if (hMapFile == NULL) { + return *backend_reg; + } + } + + torch::executor::BackendRegistry* registry = (torch::executor::BackendRegistry*) MapViewOfFile( + hMapFile, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, + 0, + sizeof(torch::executor::BackendRegistry) + ); + + if (registry == NULL) { + return *backend_reg; + } + + if (backend_reg == nullptr) { + backend_reg = std::shared_ptr(registry, [](torch::executor::BackendRegistry* ptr) { + UnmapViewOfFile(ptr); + }); + } + return *backend_reg; } +#else + +torch::executor::BackendRegistry& getBackendRegistry(); +torch::executor::BackendRegistry& getBackendRegistry() { + static torch::executor::BackendRegistry backend_reg; + return backend_reg; +} + +#endif + +namespace torch { +namespace executor { + +PyTorchBackendInterface::~PyTorchBackendInterface() {} + PyTorchBackendInterface* get_backend_class(const char* name) { return getBackendRegistry().get_backend_class(name); } diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp index e7bd216fde1..02ee9b60ff7 100644 --- a/runtime/kernel/operator_registry.cpp +++ b/runtime/kernel/operator_registry.cpp @@ -8,32 +8,80 @@ #include -#include #include #include #include #include -namespace torch { -namespace executor { +#ifdef _WIN32 +#include +#include +#include + +#define SHARED_MEMORY_NAME "torch_executor_operator_registry" +static std::shared_ptr operator_reg; -torch::executor::OperatorRegistry& getOperatorRegistry(); torch::executor::OperatorRegistry& getOperatorRegistry() { - // Operator registration happens in static initialization time when PAL init - // may or may not happen already. Here we are assuming et_pal_init() doesn't - // have any side effect even if falled multiple times. - ::et_pal_init(); + if (operator_reg != nullptr) { + return *operator_reg; + } - static torch::executor::OperatorRegistry* operator_registry = static_cast( - ::et_pal_get_shared_memory( - "torch_executor_operator_registry", - sizeof(torch::executor::OperatorRegistry) - ) + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, // read/write access + FALSE, // do not inherit the name + _T(SHARED_MEMORY_NAME) // name of mapping object ); - return *operator_registry; + + if (hMapFile == NULL) { + // Create a new file mapping object + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + sizeof(torch::executor::OperatorRegistry), // maximum object size (low-order DWORD) + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + if (hMapFile == NULL) { + return *operator_reg; + } + } + + torch::executor::OperatorRegistry* registry = (torch::executor::OperatorRegistry*) MapViewOfFile( + hMapFile, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, + 0, + sizeof(torch::executor::OperatorRegistry) + ); + + if (registry == NULL) { + return *operator_reg; + } + + if (operator_reg == nullptr) { + operator_reg = std::shared_ptr(registry, [](torch::executor::OperatorRegistry* ptr) { + UnmapViewOfFile(ptr); + }); + } + + return *operator_reg; +} + +#else + +torch::executor::OperatorRegistry& getOperatorRegistry(); +torch::executor::OperatorRegistry& getOperatorRegistry() { + static torch::executor::OperatorRegistry operator_registry; + return operator_registry; } +#endif + +namespace torch { +namespace executor { + Error register_kernels(const ArrayRef& kernels) { Error success = getOperatorRegistry().register_kernels(kernels); if (success == Error::InvalidArgument || success == Error::Internal) { @@ -47,6 +95,11 @@ Error register_kernels(const ArrayRef& kernels) { } Error OperatorRegistry::register_kernels(const ArrayRef& kernels) { + // Operator registration happens in static initialization time when PAL init + // may or may not happen already. Here we are assuming et_pal_init() doesn't + // have any side effect even if falled multiple times. + ::et_pal_init(); + if (kernels.size() + this->num_kernels_ > kMaxNumOfKernels) { ET_LOG( Error, diff --git a/runtime/platform/platform.h b/runtime/platform/platform.h index a166166e2f0..1f1b3b4c173 100644 --- a/runtime/platform/platform.h +++ b/runtime/platform/platform.h @@ -115,25 +115,4 @@ void et_pal_emit_log_message( const char* message, size_t length) ET_INTERNAL_PLATFORM_WEAKNESS; -/** - * Get a shared memory region by name. - * If the shared memory region does not exist, it will be created. - * - * @param[in] name Name of the shared memory region. - * @param[in] size Size of the shared memory region in bytes. - * - * @retval A pointer to the shared memory region on success. nullptr on failure. - */ -void* et_pal_get_shared_memory( - const char* name, - size_t size) ET_INTERNAL_PLATFORM_WEAKNESS; - -/** - * Free a shared memory region by name. - * - * @param[in] name Name of the shared memory region. - */ -void et_pal_free_shared_memory( - const char* name) ET_INTERNAL_PLATFORM_WEAKNESS; - } // extern "C" diff --git a/runtime/platform/target/Minimal.cpp b/runtime/platform/target/Minimal.cpp index 65949e9c535..ed6cac7392c 100644 --- a/runtime/platform/target/Minimal.cpp +++ b/runtime/platform/target/Minimal.cpp @@ -47,11 +47,3 @@ void et_pal_emit_log_message( __ET_UNUSED size_t line, __ET_UNUSED const char* message, __ET_UNUSED size_t length) {} - -void* et_pal_get_shared_memory( - __ET_UNUSED const char* name, - __ET_UNUSED size_t size) { - return nullptr; -} - -void et_pal_free_shared_memory(__ET_UNUSED void* ptr) {} diff --git a/runtime/platform/target/Posix.cpp b/runtime/platform/target/Posix.cpp index 79ea3b29c73..f434043d838 100644 --- a/runtime/platform/target/Posix.cpp +++ b/runtime/platform/target/Posix.cpp @@ -27,9 +27,6 @@ #include #include #include -#include -#include -#include #include @@ -72,9 +69,6 @@ static std::chrono::time_point systemStartTime; /// Flag set to true if the PAL has been successfully initialized. static bool initialized = false; -// Shared memory -static std::unordered_map> sharedMemoryMap; - /** * Initialize the platform abstraction layer. * @@ -176,29 +170,3 @@ void et_pal_emit_log_message( message); fflush(ET_LOG_OUTPUT_FILE); } - -void* et_pal_get_shared_memory( - const char* name, - size_t size) { - _ASSERT_PAL_INITIALIZED(); - - auto it = sharedMemoryMap.find(name); - if (it != sharedMemoryMap.end()) { - return it->second.get(); - } - - auto sharedMemory = std::shared_ptr(malloc(size), free); - if (sharedMemory == nullptr) { - return nullptr; - } - - sharedMemoryMap[name] = sharedMemory; - return sharedMemory.get(); -} - -void et_pal_free_shared_memory( - const char* name) { - _ASSERT_PAL_INITIALIZED(); - - sharedMemoryMap.erase(name); -} diff --git a/runtime/platform/target/Windows.cpp b/runtime/platform/target/Windows.cpp index 8b568da9fa1..8eb66604082 100644 --- a/runtime/platform/target/Windows.cpp +++ b/runtime/platform/target/Windows.cpp @@ -30,12 +30,11 @@ #include #include #include -#include -#include -#include #include +#define SHARED_MEMORY_NAME "torch_executor_platform_init_time" + // The FILE* to write logs to. #define ET_LOG_OUTPUT_FILE stderr @@ -70,30 +69,48 @@ #endif // NDEBUG /// Start time of the system (used to zero the system timestamp). -static std::chrono::time_point* systemStartTime = nullptr; - -// Shared memory -typedef std::pair, HANDLE> SharedMemory; -static std::unordered_map sharedMemoryMap; +static std::shared_ptr> systemStartTime = nullptr; -void* et_pal_get_shared_memory_internal( - const char* name, - size_t size); bool check_shared_memory() { if (systemStartTime != nullptr) { return true; } - void *sharedMemory = et_pal_get_shared_memory_internal( - "torch_executor_platform_init_time", - sizeof(std::chrono::time_point) + HANDLE hMapFile = OpenFileMapping( + FILE_MAP_ALL_ACCESS, // read/write access + FALSE, // do not inherit the name + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + + if (hMapFile == NULL) { + // Create a new file mapping object + hMapFile = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_READWRITE, // read/write access + 0, // maximum object size (high-order DWORD) + sizeof(std::chrono::time_point), // maximum object size (low-order DWORD) + _T(SHARED_MEMORY_NAME) // name of mapping object + ); + if (hMapFile == NULL) { + return false; + } + } + + systemStartTime =std::shared_ptr>( + (std::chrono::time_point*) MapViewOfFile( + hMapFile, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + 0, + 0, + sizeof(std::chrono::time_point) + ) ); - if (sharedMemory == nullptr) { + + if (systemStartTime == nullptr) { return false; } - systemStartTime = static_cast*>(sharedMemory); - *systemStartTime = std::chrono::steady_clock::now(); return true; } @@ -196,71 +213,3 @@ void et_pal_emit_log_message( message); fflush(ET_LOG_OUTPUT_FILE); } - -void* et_pal_get_shared_memory_internal( - const char* name, - size_t size) { - - auto it = sharedMemoryMap.find(name); - if (it != sharedMemoryMap.end()) { - return it->second.first.get(); - } - - HANDLE hMapFile = OpenFileMapping( - FILE_MAP_ALL_ACCESS, - FALSE, - name); - if (hMapFile == NULL) { - hMapFile = CreateFileMapping( - INVALID_HANDLE_VALUE, - NULL, - PAGE_READWRITE, - 0, - size, - name); - if (hMapFile == NULL) { - return nullptr; - } - } - - void* sharedMemory = MapViewOfFile( - hMapFile, - FILE_MAP_ALL_ACCESS, - 0, - 0, - size); - if (sharedMemory == NULL) { - CloseHandle(hMapFile); - return nullptr; - } - - sharedMemoryMap[name] = { - std::shared_ptr( - sharedMemory, - [hMapFile](void* ptr) { - UnmapViewOfFile(ptr); - CloseHandle(hMapFile); - } - ), - hMapFile - }; - return sharedMemoryMap[name].first.get(); -} - -void* et_pal_get_shared_memory( - const char* name, - size_t size) { - _ASSERT_PAL_INITIALIZED(); - return et_pal_get_shared_memory_internal(name, size); -} - -void et_pal_free_shared_memory( - const char* name) { - _ASSERT_PAL_INITIALIZED(); - auto it = sharedMemoryMap.find(name); - if (it == sharedMemoryMap.end()) { - return; - } - - sharedMemoryMap.erase(it); -} From 06a44297ca30a6d1be648c9bdbfd3ae0fca80462 Mon Sep 17 00:00:00 2001 From: Hans Date: Mon, 3 Jun 2024 02:00:35 +0800 Subject: [PATCH 19/25] Fix time release --- runtime/platform/target/Windows.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/runtime/platform/target/Windows.cpp b/runtime/platform/target/Windows.cpp index 8eb66604082..b6d387525a2 100644 --- a/runtime/platform/target/Windows.cpp +++ b/runtime/platform/target/Windows.cpp @@ -104,7 +104,10 @@ bool check_shared_memory() { 0, 0, sizeof(std::chrono::time_point) - ) + ), + [](std::chrono::time_point* ptr) { + UnmapViewOfFile(ptr); + } ); if (systemStartTime == nullptr) { From b013a53c335652c4ebfad56e4857d4f3dc2b314e Mon Sep 17 00:00:00 2001 From: Hans Date: Mon, 23 Sep 2024 22:55:52 +0800 Subject: [PATCH 20/25] Fix build --- backends/qualcomm/CMakeLists.txt | 8 ++-- build/Utils.cmake | 9 +++- build/extract_sources.py | 13 ++++++ extension/data_loader/file_data_loader.cpp | 48 +++++++++++----------- extension/data_loader/file_data_loader.h | 4 +- runtime/platform/targets.bzl | 2 +- 6 files changed, 55 insertions(+), 29 deletions(-) diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index 1ab77f1b880..e2a94ed9685 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -183,9 +183,11 @@ target_link_libraries( qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_no_prim_ops qcir_utils extension_tensor ) -set_target_properties( - qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'" -) +if (NOT CMAKE_SYSTEM_NAME STREQUAL "Windows") + set_target_properties( + qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'" + ) +endif() target_link_libraries(utils PRIVATE qnn_executorch_logging) target_link_libraries( shared_buffer PRIVATE qnn_executorch_logging ${CMAKE_DL_LIBS} diff --git a/build/Utils.cmake b/build/Utils.cmake index 3cd8bab3d06..4cf5370ee64 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -195,13 +195,20 @@ function(extract_sources sources_file) else() message(FATAL_ERROR "Unsupported ANDROID_ABI setting ${ANDROID_ABI}. Please add it here!") endif() + elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") + set(fake_host_arg "--fake-host=windows") + if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + set(fake_arch_arg "--fake-arch=x8664") + elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") + set(fake_arch_arg "--fake-arch=aarch64") + endif() endif() execute_process( COMMAND ${PYTHON_EXECUTABLE} ${executorch_root}/build/extract_sources.py --config=${executorch_root}/build/cmake_deps.toml --out=${sources_file} - --buck2=${BUCK2} ${target_platforms_arg} + --buck2=${BUCK2} ${target_platforms_arg} ${fake_host_arg} ${fake_arch_arg} OUTPUT_VARIABLE gen_srcs_output ERROR_VARIABLE gen_srcs_error RESULT_VARIABLE gen_srcs_exit_code diff --git a/build/extract_sources.py b/build/extract_sources.py index 5004fe0c508..1ab62f5621d 100755 --- a/build/extract_sources.py +++ b/build/extract_sources.py @@ -183,6 +183,12 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--target-platforms", help="--target-platforms to pass to buck cquery, if any." ) + parser.add_argument( + "--fake-host", help="Fake host to pass to buck cquery, if any." + ) + parser.add_argument( + "--fake-arch", help="Fake architecture to pass to buck cquery, if any." + ) return parser.parse_args() @@ -213,6 +219,13 @@ def main(): if args.target_platforms: buck_args = ["--target-platforms"] buck_args.append(args.target_platforms) + if args.fake_host: + buck_args.append("--fake-host") + buck_args.append(args.fake_host) + if args.fake_arch: + buck_args.append("--fake-arch") + buck_args.append(args.fake_arch) + print(f"buck_args: {buck_args}") for name, target in graph.by_name.items(): target_to_srcs[name] = sorted(target.get_sources(graph, runner, buck_args)) diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 36ef549b86e..29f2b0dbde5 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -182,11 +182,7 @@ Result FileDataLoader::load( ET_UNUSED const DataLoader::SegmentInfo& segment_info) const { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. -#ifdef _WIN32 - fd_ != INVALID_HANDLE_VALUE, -#else - fd_ >= 0, -#endif + IS_VALID_FD(fd_), InvalidState, "Uninitialized"); ET_CHECK_OR_RETURN_ERROR( @@ -260,11 +256,7 @@ Result FileDataLoader::load( Result FileDataLoader::size() const { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. -#ifdef _WIN32 - fd_ != INVALID_HANDLE_VALUE, -#else - fd_ >= 0, -#endif + IS_VALID_FD(fd_), InvalidState, "Uninitialized"); return file_size_; @@ -277,7 +269,7 @@ ET_NODISCARD Error FileDataLoader::load_into( void* buffer) const { ET_CHECK_OR_RETURN_ERROR( // Probably had its value moved to another instance. - fd_ >= 0, + IS_VALID_FD(fd_), InvalidState, "Uninitialized"); ET_CHECK_OR_RETURN_ERROR( @@ -295,32 +287,32 @@ ET_NODISCARD Error FileDataLoader::load_into( size_t needed = size; uint8_t* buf = reinterpret_cast(buffer); - // Make a duplicate fd if pread() is not available and we have to seek(). - // Cannot use the standard dup() or fcntl() calls because the returned - // duplicate will share the underlying file record and affect the original fd - // when seeking on multiple threads simultaneously. - const auto dup_fd = ET_HAVE_PREAD ? fd_ : ::open(file_name_, O_RDONLY); - #ifdef _WIN32 + while (needed > 0) { const auto chunk_size = std::min( needed, static_cast(std::numeric_limits::max())); LARGE_INTEGER move; move.QuadPart = static_cast(offset); - if (!SetFilePointerEx(file_handle, move, nullptr, FILE_BEGIN)) { - std::cerr << "Failed to set file pointer: " << GetLastError() << std::endl; + if (!SetFilePointerEx(fd_, move, nullptr, FILE_BEGIN)) { + ET_LOG( + Error, + "Reading from %s: failed to set file pointer: %lx", + file_name_, + GetLastError()); return Error::AccessFailed; } DWORD nread = 0; - if (!ReadFile(file_handle, buf, static_cast(chunk_size), &nread, nullptr)) { + if (!ReadFile(fd_, buf, static_cast(chunk_size), &nread, nullptr)) { DWORD error_code = GetLastError(); if (error_code == ERROR_IO_PENDING) { continue; } ET_LOG( Error, - "Reading from %s: failed to read %zu bytes at offset %zu: %#x", - file_name, + "Reading from %s: failed to read %zu bytes at offset %lu: %lx", + file_name_, + chunk_size, offset, error_code); return Error::AccessFailed; @@ -330,7 +322,7 @@ ET_NODISCARD Error FileDataLoader::load_into( ET_LOG( Error, "Reading from %s: EOF encountered unexpectedly at offset %zu", - file_name, + file_name_, offset); return Error::AccessFailed; } @@ -339,7 +331,15 @@ ET_NODISCARD Error FileDataLoader::load_into( buf += nread; offset += nread; } + #else + + // Make a duplicate fd if pread() is not available and we have to seek(). + // Cannot use the standard dup() or fcntl() calls because the returned + // duplicate will share the underlying file record and affect the original fd + // when seeking on multiple threads simultaneously. + const auto dup_fd = ET_HAVE_PREAD ? fd_ : ::open(file_name_, O_RDONLY); + while (needed > 0) { // Reads on macOS will fail with EINVAL if size > INT32_MAX. const auto chunk_size = std::min( @@ -378,7 +378,9 @@ ET_NODISCARD Error FileDataLoader::load_into( if (!ET_HAVE_PREAD) { ::close(dup_fd); } + #endif + return Error::Ok; } diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h index 27babd79edc..c18e055eca2 100644 --- a/extension/data_loader/file_data_loader.h +++ b/extension/data_loader/file_data_loader.h @@ -12,9 +12,11 @@ #include #define FD_TYPE HANDLE #define INVALID_FD INVALID_HANDLE_VALUE +#define IS_VALID_FD(fd) (fd != INVALID_HANDLE_VALUE) #else #define FD_TYPE int #define INVALID_FD -1 +#define IS_VALID_FD(fd) (fd >= 0) #endif #include @@ -68,7 +70,7 @@ class FileDataLoader final : public executorch::runtime::DataLoader { const_cast(rhs.file_name_) = nullptr; const_cast(rhs.file_size_) = 0; const_cast(rhs.alignment_) = 0; - const_cast(rhs.fd_) = INVALID_FD; + const_cast(rhs.fd_) = INVALID_FD; } ~FileDataLoader() override; diff --git a/runtime/platform/targets.bzl b/runtime/platform/targets.bzl index db4ac57c04f..56ce4b3aeff 100644 --- a/runtime/platform/targets.bzl +++ b/runtime/platform/targets.bzl @@ -49,7 +49,7 @@ def define_common_targets(): srcs = _select_pal({ "minimal": ["default/minimal.cpp"], "posix": ["default/posix.cpp"], - "windows": ["target/Windows.cpp"], + "windows": ["default/Windows.cpp"], }), deps = [ ":pal_interface", From 5b87e5006d89e487f5cd5450e822ec52d59a526a Mon Sep 17 00:00:00 2001 From: Hans Date: Tue, 24 Sep 2024 01:53:10 +0800 Subject: [PATCH 21/25] Fix shared segment for MinGW --- runtime/backend/interface.cpp | 9 ++------- runtime/kernel/operator_registry.cpp | 13 ++++--------- runtime/platform/compiler.h | 12 ++++++++++++ runtime/platform/default/Windows.cpp | 11 ++++------- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/runtime/backend/interface.cpp b/runtime/backend/interface.cpp index 1266ad173a6..dd54ea82693 100644 --- a/runtime/backend/interface.cpp +++ b/runtime/backend/interface.cpp @@ -19,24 +19,19 @@ namespace { // The max number of backends that can be registered globally. constexpr size_t kMaxRegisteredBackends = 16; -#ifdef _WIN32 -#pragma data_seg(".shared") -#endif +#pragma data_seg(".SS_DLLMAIN") // TODO(T128866626): Remove global static variables. We want to be able to run // multiple Executor instances and having a global registration isn't a viable // solution in the long term. /// Global table of registered backends. -Backend registered_backends[kMaxRegisteredBackends]; +Backend registered_backends[kMaxRegisteredBackends] ET_SHARED; /// The number of backends registered in the table. size_t num_registered_backends = 0; -#ifdef _WIN32 #pragma data_seg() -#pragma comment(linker, "/SECTION:.shared,RWS") -#endif } // namespace diff --git a/runtime/kernel/operator_registry.cpp b/runtime/kernel/operator_registry.cpp index 586a8ef9970..9ad3f48be2d 100644 --- a/runtime/kernel/operator_registry.cpp +++ b/runtime/kernel/operator_registry.cpp @@ -29,9 +29,7 @@ constexpr uint32_t kMaxKernelsPerOp = 8; constexpr uint32_t kMaxRegisteredKernels = kMaxOperators * kMaxKernelsPerOp; #endif -#ifdef _WIN32 -#pragma data_seg(".shared") -#endif +#pragma data_seg(".SS_DLLMAIN") // Data that backs the kernel table. Since Kernel has a custom default // constructor (implicitly, because it contains KernelKey, which has a custom @@ -41,18 +39,15 @@ constexpr uint32_t kMaxRegisteredKernels = kMaxOperators * kMaxKernelsPerOp; // and point the table at it. // @lint-ignore CLANGTIDY facebook-hte-CArray alignas(sizeof(Kernel)) uint8_t - registered_kernels_data[kMaxRegisteredKernels * sizeof(Kernel)]; + registered_kernels_data[kMaxRegisteredKernels * sizeof(Kernel)] ET_SHARED; /// Global table of registered kernels. -Kernel* registered_kernels = reinterpret_cast(registered_kernels_data); +Kernel* registered_kernels ET_SHARED = reinterpret_cast(registered_kernels_data); /// The number of kernels registered in the table. -size_t num_registered_kernels = 0; +size_t num_registered_kernels ET_SHARED = 0; -#ifdef _WIN32 #pragma data_seg() -#pragma comment(linker, "/SECTION:.shared,RWS") -#endif // Registers the kernels, but may return an error. Error register_kernels_internal(const Span kernels) { diff --git a/runtime/platform/compiler.h b/runtime/platform/compiler.h index b6f7fc8642f..2120e585b69 100644 --- a/runtime/platform/compiler.h +++ b/runtime/platform/compiler.h @@ -162,6 +162,18 @@ using ssize_t = ptrdiff_t; #endif +// Shared variable section +#ifdef _WIN32 +#ifdef __MINGW32__ +#define ET_SHARED __attribute__((section(".shr"), shared)) +#else +#define ET_SHARED +#endif +#else +#define ET_SHARED +#endif + + // DEPRECATED: Use the non-underscore-prefixed versions instead. // TODO(T199005537): Remove these once all users have stopped using them. #define __ET_DEPRECATED ET_DEPRECATED diff --git a/runtime/platform/default/Windows.cpp b/runtime/platform/default/Windows.cpp index 81f020aea51..eb140b19cb6 100644 --- a/runtime/platform/default/Windows.cpp +++ b/runtime/platform/default/Windows.cpp @@ -33,8 +33,6 @@ #include -#define SHARED_MEMORY_NAME "torch_executor_platform_init_time" - // The FILE* to write logs to. #define ET_LOG_OUTPUT_FILE stderr @@ -68,16 +66,15 @@ #endif // NDEBUG -#pragma data_seg(".shared") // Start of shared data segment +#pragma data_seg(".SS_DLLMAIN") /// Start time of the system (used to zero the system timestamp). -static std::chrono::time_point systemStartTime; +static std::chrono::time_point systemStartTime ET_SHARED; /// Flag set to true if the PAL has been successfully initialized. -static bool initialized = false; +static bool initialized ET_SHARED = false; -#pragma data_seg() // End of shared data segment -#pragma comment(linker, "/SECTION:.shared,RWS") // Make the shared data segment read-write-shared +#pragma data_seg() /** * Initialize the platform abstraction layer. From 25d77516938dff31f519d915513b7196f924a1d5 Mon Sep 17 00:00:00 2001 From: Hans Date: Tue, 24 Sep 2024 02:03:40 +0800 Subject: [PATCH 22/25] Merge `runtime/platform/default/Windows.cpp` into `runtime/platform/default/posix.cpp` --- runtime/platform/default/Windows.cpp | 179 --------------------------- runtime/platform/default/posix.cpp | 8 +- runtime/platform/targets.bzl | 8 +- 3 files changed, 7 insertions(+), 188 deletions(-) delete mode 100644 runtime/platform/default/Windows.cpp diff --git a/runtime/platform/default/Windows.cpp b/runtime/platform/default/Windows.cpp deleted file mode 100644 index eb140b19cb6..00000000000 --- a/runtime/platform/default/Windows.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -/** - * @file - * Fallback PAL implementations for POSIX-compatible systems. - * - * Note that this assumes that the platform defines the symbols used in this - * file (like fprintf()), because this file will still be built even if the - * functions are later overridden. When building for a platform that does not - * provide the necessary symbols, clients can use Minimal.cpp instead, but they - * will need to override all of the functions. - */ - -// This cpp file will provide weak implementations of the symbols declared in -// Platform.h. Client users can strongly define any or all of the functions to -// override them. -#define ET_INTERNAL_PLATFORM_WEAKNESS __ET_WEAK -#include - -#include -#include - -#include -#include -#include -#include - -#include - -// The FILE* to write logs to. -#define ET_LOG_OUTPUT_FILE stderr - -/** - * On debug builds, ensure that `et_pal_init` has been called before - * other PAL functions which depend on initialization. - */ -#ifdef NDEBUG - -/** - * Assert that the PAL has been initialized. - */ -#define _ASSERT_PAL_INITIALIZED() ((void)0) - -#else // NDEBUG - -/** - * Assert that the PAL has been initialized. - */ -#define _ASSERT_PAL_INITIALIZED() \ - ({ \ - if (!initialized) { \ - fprintf( \ - ET_LOG_OUTPUT_FILE, \ - "ExecuTorch PAL must be initialized before call to %s()", \ - __ET_FUNCTION); \ - fflush(ET_LOG_OUTPUT_FILE); \ - et_pal_abort(); \ - } \ - }) - -#endif // NDEBUG - -#pragma data_seg(".SS_DLLMAIN") - -/// Start time of the system (used to zero the system timestamp). -static std::chrono::time_point systemStartTime ET_SHARED; - -/// Flag set to true if the PAL has been successfully initialized. -static bool initialized ET_SHARED = false; - -#pragma data_seg() - -/** - * Initialize the platform abstraction layer. - * - * This function should be called before any other function provided by the PAL - * to initialize any global state. Typically overridden by PAL implementer. - */ -void et_pal_init(void) { - if (initialized) { - return; - } - - systemStartTime = std::chrono::steady_clock::now(); - initialized = true; -} - -/** - * Immediately abort execution, setting the device into an error state, if - * available. - */ -__ET_NORETURN void et_pal_abort(void) { - std::abort(); -} - -/** - * Return a monotonically non-decreasing timestamp in system ticks. - * - * @retval Timestamp value in system ticks. - */ -et_timestamp_t et_pal_current_ticks(void) { - _ASSERT_PAL_INITIALIZED(); - auto systemCurrentTime = std::chrono::steady_clock::now(); - return std::chrono::duration_cast( - systemCurrentTime - systemStartTime) - .count(); -} - -/** - * Return the conversion rate from system ticks to nanoseconds, as a fraction. - * To convert an interval from system ticks to nanoseconds, multiply the tick - * count by the numerator and then divide by the denominator: - * nanoseconds = ticks * numerator / denominator - * - * @retval The ratio of nanoseconds to system ticks. - */ -et_tick_ratio_t et_pal_ticks_to_ns_multiplier(void) { - // The system tick interval is 1 nanosecond, so the conversion factor is 1. - return {1, 1}; -} - -/** - * Emit a log message via platform output (serial port, console, etc). - * - * @param[in] timestamp Timestamp of the log event in system ticks since boot. - * @param[in] level Severity level of the message. Must be a printable 7-bit - * ASCII uppercase letter. - * @param[in] filename Name of the file that created the log event. - * @param[in] function Name of the function that created the log event. - * @param[in] line Line in the source file where the log event was created. - * @param[in] message Message string to log. - * @param[in] length Message string length. - */ -void et_pal_emit_log_message( - et_timestamp_t timestamp, - et_pal_log_level_t level, - const char* filename, - __ET_UNUSED const char* function, - size_t line, - const char* message, - __ET_UNUSED size_t length) { - _ASSERT_PAL_INITIALIZED(); - - // Not all platforms have ticks == nanoseconds, but this one does. - timestamp /= 1000; // To microseconds - unsigned long int us = timestamp % 1000000; - timestamp /= 1000000; // To seconds - unsigned int sec = timestamp % 60; - timestamp /= 60; // To minutes - unsigned int min = timestamp % 60; - timestamp /= 60; // To hours - unsigned int hour = timestamp; - - // Use a format similar to glog and folly::logging, except: - // - Print time since et_pal_init since we don't have wall time - // - Don't include the thread ID, to avoid adding a threading dependency - // - Add the string "executorch:" to make the logs more searchable - // - // Clients who want to change the format or add other fields can override this - // weak implementation of et_pal_emit_log_message. - fprintf( - ET_LOG_OUTPUT_FILE, - "%c %02u:%02u:%02u.%06lu executorch:%s:%zu] %s\n", - level, - hour, - min, - sec, - us, - filename, - line, - message); - fflush(ET_LOG_OUTPUT_FILE); -} diff --git a/runtime/platform/default/posix.cpp b/runtime/platform/default/posix.cpp index aba504f53e0..f808b717ab7 100644 --- a/runtime/platform/default/posix.cpp +++ b/runtime/platform/default/posix.cpp @@ -63,11 +63,15 @@ #endif // NDEBUG +#pragma data_seg(".SS_DLLMAIN") // Shared data segment for DLL main with MSVC + /// Start time of the system (used to zero the system timestamp). -static std::chrono::time_point systemStartTime; +static std::chrono::time_point systemStartTime ET_SHARED; /// Flag set to true if the PAL has been successfully initialized. -static bool initialized = false; +static bool initialized ET_SHARED = false; + +#pragma data_seg() /** * Initialize the platform abstraction layer. diff --git a/runtime/platform/targets.bzl b/runtime/platform/targets.bzl index 56ce4b3aeff..6d3141a4219 100644 --- a/runtime/platform/targets.bzl +++ b/runtime/platform/targets.bzl @@ -6,12 +6,7 @@ def _select_pal(dict_): `executorch.pal_default` build config value. Fails if no corresponding entry exists. """ - # buck2 check target platform - # check config//os: - if host_info().os.is_windows: - pal_default = native.read_config("executorch", "pal_default", "windows") - else: - pal_default = native.read_config("executorch", "pal_default", "posix") + pal_default = native.read_config("executorch", "pal_default", "posix") if not pal_default in dict_: fail("Missing key for executorch.pal_default value '{}' in dict '{}'".format(pal_default, dict_)) return dict_[pal_default] @@ -49,7 +44,6 @@ def define_common_targets(): srcs = _select_pal({ "minimal": ["default/minimal.cpp"], "posix": ["default/posix.cpp"], - "windows": ["default/Windows.cpp"], }), deps = [ ":pal_interface", From 304734d944884cc5b7fee2634a23090d4dd9a00b Mon Sep 17 00:00:00 2001 From: Hans Date: Fri, 27 Sep 2024 15:50:48 +0800 Subject: [PATCH 23/25] Support cross-compile --- build/Utils.cmake | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/build/Utils.cmake b/build/Utils.cmake index 4cf5370ee64..9228789e658 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -195,12 +195,17 @@ function(extract_sources sources_file) else() message(FATAL_ERROR "Unsupported ANDROID_ABI setting ${ANDROID_ABI}. Please add it here!") endif() - elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") - set(fake_host_arg "--fake-host=windows") - if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(fake_arch_arg "--fake-arch=x8664") - elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64" OR "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") - set(fake_arch_arg "--fake-arch=aarch64") + else() + if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "${CMAKE_HOST_SYSTEM_NAME}") + string(TOLOWER "${CMAKE_SYSTEM_NAME}" lowercase_system_name) + set(fake_host_arg "--fake-host=${lowercase_system_name}") + endif() + if (NOT "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "${CMAKE_HOST_SYSTEM_PROCESSOR}") + if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + set(fake_arch_arg "--fake-arch=x8664") + else() + set(fake_arch_arg "--fake-arch=aarch64") + endif() endif() endif() From 93695c4f4e42dbec14e7608ddb7ee3b7342afe89 Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 28 Sep 2024 21:14:10 +0800 Subject: [PATCH 24/25] Support cross-compile for macOS --- build/Utils.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/build/Utils.cmake b/build/Utils.cmake index 9228789e658..d3e458e381e 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -207,6 +207,11 @@ function(extract_sources sources_file) set(fake_arch_arg "--fake-arch=aarch64") endif() endif() + if ("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64") + set(fake_arch_arg "--fake-arch=aarch64") + elseif("${CMAKE_OSX_ARCHITECTURES}" STREQUAL "x86_64") + set(fake_arch_arg "--fake-arch=x8664") + endif() endif() execute_process( From 2cbb25266d60a3f388062ec1841a59eaabcea0af Mon Sep 17 00:00:00 2001 From: Hans Date: Sat, 28 Sep 2024 21:18:12 +0800 Subject: [PATCH 25/25] Correct arg value when cross compile macOS --- build/Utils.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/build/Utils.cmake b/build/Utils.cmake index d3e458e381e..620ba27114e 100644 --- a/build/Utils.cmake +++ b/build/Utils.cmake @@ -197,8 +197,12 @@ function(extract_sources sources_file) endif() else() if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "${CMAKE_HOST_SYSTEM_NAME}") - string(TOLOWER "${CMAKE_SYSTEM_NAME}" lowercase_system_name) - set(fake_host_arg "--fake-host=${lowercase_system_name}") + if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") + set(fake_host_arg "--fake-host=macos") + else() + string(TOLOWER "${CMAKE_SYSTEM_NAME}" lowercase_system_name) + set(fake_host_arg "--fake-host=${lowercase_system_name}") + endif() endif() if (NOT "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "${CMAKE_HOST_SYSTEM_PROCESSOR}") if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")