diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index 97c15944b29ad..a12c498b8ad68 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -165,6 +165,12 @@ void _wassert(const wchar_t *wexpr, const wchar_t *wfile, unsigned line) { } #else DEVICE_EXTERN_C +void *malloc(size_t size) { + return reinterpret_cast(0xEFEFEFEFEFEFEFEF); +} +DEVICE_EXTERN_C +void free(void *ptr) { return ; } +DEVICE_EXTERN_C void __assert_fail(const char *expr, const char *file, unsigned int line, const char *func) { __devicelib_assert_fail( diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index c0b3f7779c159..1cf092ad3c70b 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -217,6 +217,7 @@ class PropertySetRegistry { static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties"; static constexpr char SYCL_ASSERT_USED[] = "SYCL/assert used"; + static constexpr char SYCL_MALLOC_USED[] = "SYCL/malloc used"; static constexpr char SYCL_KERNEL_NAMES[] = "SYCL/kernel names"; static constexpr char SYCL_EXPORTED_SYMBOLS[] = "SYCL/exported symbols"; static constexpr char SYCL_IMPORTED_SYMBOLS[] = "SYCL/imported symbols"; diff --git a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp index c4bd7129fefe4..c3fb7edcd0dfe 100644 --- a/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLPostLink/ComputeModuleRuntimeInfo.cpp @@ -456,6 +456,13 @@ PropSetRegTy computeModuleProperties(const Module &M, for (const StringRef &FName : FuncNames) PropSet.add(PropSetRegTy::SYCL_ASSERT_USED, FName, true); } + { + std::vector MallocFuncNames{"malloc", "free"}; + std::vector FuncNames = + getKernelNamesUsingSpecialFunctions(M, MallocFuncNames); + for (const StringRef &FName : FuncNames) + PropSet.add(PropSetRegTy::SYCL_MALLOC_USED, FName, true); + } { std::vector> ArgPos = getKernelNamesUsingImplicitLocalMem(M); diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 0aa48c6992525..6d83080e7f822 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -646,6 +646,8 @@ __glibcxx_assert_fail(const char *file, int line, const char *func, const char *cond) noexcept; } // namespace std extern "C" { +extern __DPCPP_SYCL_EXTERNAL_LIBC void *malloc(size_t size); +extern __DPCPP_SYCL_EXTERNAL_LIBC void free(void *ptr); extern __DPCPP_SYCL_EXTERNAL_LIBC void __assert_fail(const char *expr, const char *file, unsigned int line, diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 2fc0b56135b20..64ddf15ed0934 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -57,6 +57,8 @@ #define __SYCL_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties" /// PropertySetRegistry::SYCL_ASSERT_USED defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_SYCL_ASSERT_USED "SYCL/assert used" +/// PropertySetRegistry::SYCL_MALLOC_USED defined in PropertySetIO.h +#define __SYCL_PROPERTY_SET_SYCL_MALLOC_USED "SYCL/malloc used" /// PropertySetRegistry::SYCL_KERNEL_NAMES defined in PropertySetIO.h #define __SYCL_PROPERTY_SET_SYCL_KERNEL_NAMES "SYCL/kernel names" /// PropertySetRegistry::SYCL_EXPORTED_SYMBOLS defined in PropertySetIO.h diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index c8ff57631bb60..bd04c9ad3d156 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -192,6 +192,7 @@ RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { DeviceLibMetadata.init(Bin, __SYCL_PROPERTY_SET_DEVICELIB_METADATA); KernelParamOptInfo.init(Bin, __SYCL_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); AssertUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_ASSERT_USED); + MallocUsed.init(Bin, __SYCL_PROPERTY_SET_SYCL_MALLOC_USED); ImplicitLocalArg.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPLICIT_LOCAL_ARG); ProgramMetadata.init(Bin, __SYCL_PROPERTY_SET_PROGRAM_METADATA); // Convert ProgramMetadata into the UR format diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 075229effb3ec..074c3dfcd927a 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -224,6 +224,7 @@ class RTDeviceBinaryImage { return KernelParamOptInfo; } const PropertyRange &getAssertUsed() const { return AssertUsed; } + const PropertyRange &getMallocUsed() const { return MallocUsed; } const PropertyRange &getProgramMetadata() const { return ProgramMetadata; } const std::vector &getProgramMetadataUR() const { return ProgramMetadataUR; @@ -260,6 +261,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange DeviceLibMetadata; RTDeviceBinaryImage::PropertyRange KernelParamOptInfo; RTDeviceBinaryImage::PropertyRange AssertUsed; + RTDeviceBinaryImage::PropertyRange MallocUsed; RTDeviceBinaryImage::PropertyRange ProgramMetadata; RTDeviceBinaryImage::PropertyRange KernelNames; RTDeviceBinaryImage::PropertyRange ExportedSymbols; diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index 526f160c6596b..3aae700fb3277 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -25,6 +25,7 @@ DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { auto &PM = detail::ProgramManager::getInstance(); MUsesAssert = PM.kernelUsesAssert(KernelName); + MUsesMalloc = PM.kernelUsesMalloc(KernelName); MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); #ifndef __INTEL_PREVIEW_BREAKING_CHANGES MInitialized.store(true); @@ -78,10 +79,17 @@ FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { assertInitialized(); return MFastKernelSubcache; } + bool DeviceKernelInfo::usesAssert() { assertInitialized(); return MUsesAssert; } + +bool DeviceKernelInfo::usesMalloc() { + assertInitialized(); + return MUsesMalloc; +} + const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { assertInitialized(); return MImplicitLocalArgPos; diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index 0ea4ff2d051e6..d7cf1c0f2643b 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -109,6 +109,7 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { FastKernelSubcacheT &getKernelSubcache(); bool usesAssert(); + bool usesMalloc(); const std::optional &getImplicitLocalArgPos(); private: @@ -121,6 +122,7 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { FastKernelSubcacheT MFastKernelSubcache; bool MUsesAssert; std::optional MImplicitLocalArgPos; + bool MUsesMalloc; }; } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 2ca4420dc0549..1b96129b0e945 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1802,6 +1802,14 @@ void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) { m_KernelUsesAssert.insert(Prop->Name); } +void ProgramManager::cacheKernelUsesMallocInfo(const RTDeviceBinaryImage &Img) { + const RTDeviceBinaryImage::PropertyRange &MallocUsedRange = + Img.getMallocUsed(); + if (MallocUsedRange.isAvailable()) + for (const auto &Prop : MallocUsedRange) + m_KernelUsesMalloc.insert(Prop->Name); +} + void ProgramManager::cacheKernelImplicitLocalArg( const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = @@ -2045,6 +2053,7 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } cacheKernelUsesAssertInfo(*Img); + cacheKernelUsesMallocInfo(*Img); // check if kernel uses sanitizer { diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b9d0dc700f77c..a76f94346b516 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -370,6 +370,11 @@ class ProgramManager { return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); } + template + bool kernelUsesMalloc(const NameT &KernelName) const { + return m_KernelUsesMalloc.find(KernelName) != m_KernelUsesMalloc.end(); + } + SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } std::optional @@ -409,6 +414,9 @@ class ProgramManager { /// Add info on kernels using assert into cache void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); + /// Add info on kernels using assert into cache + void cacheKernelUsesMallocInfo(const RTDeviceBinaryImage &Img); + /// Add info on kernels using local arg into cache void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); @@ -522,8 +530,9 @@ class ProgramManager { // different types without temporary key_type object creation. This includes // standard overloads, such as comparison between std::string and // std::string_view or just char*. - using KernelUsesAssertSet = std::set>; - KernelUsesAssertSet m_KernelUsesAssert; + using KernelUsesFnSet = std::set>; + KernelUsesFnSet m_KernelUsesAssert; + KernelUsesFnSet m_KernelUsesMalloc; std::unordered_map m_KernelImplicitLocalArgPos; // Map for storing device kernel information. Runtime lookup should be avoided diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index dde2d0f8a5eb6..52a5ce80c462b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -2534,6 +2535,11 @@ static ur_result_t SetKernelParamsAndLaunch( property_list.push_back({UR_KERNEL_LAUNCH_PROPERTY_ID_WORK_GROUP_MEMORY, {{WorkGroupMemorySize}}}); } + + if (DeviceKernelInfo.usesMalloc()) + std::cout << "enqueue Kernel with Malloc launch for " << DeviceKernelInfo.Name.data() << std::endl; + else + std::cout << "enqueue Kernel without Malloc launch for " << DeviceKernelInfo.Name.data() << std::endl; ur_event_handle_t UREvent = nullptr; ur_result_t Error = Adapter.call_nocheck(