diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 7dfa19cd41851..ffa071f209580 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -157,7 +157,7 @@ void single_task(queue Q, const KernelType &KernelObj, if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { detail::submit_kernel_direct_single_task( - std::move(Q), KernelObj, empty_properties_t{}, CodeLoc); + std::move(Q), KernelObj, {}, empty_properties_t{}, CodeLoc); } else { submit( std::move(Q), @@ -312,7 +312,7 @@ void nd_launch(queue Q, launch_config, Properties> Config, LaunchConfigAccess(Config); detail::submit_kernel_direct_parallel_for( - std::move(Q), LaunchConfigAccess.getRange(), KernelObj, + std::move(Q), LaunchConfigAccess.getRange(), KernelObj, {}, LaunchConfigAccess.getProperties()); } else { submit(std::move(Q), [&](handler &CGH) { diff --git a/sycl/include/sycl/khr/free_function_commands.hpp b/sycl/include/sycl/khr/free_function_commands.hpp index f32e493008bfe..f155d4e6ad73c 100644 --- a/sycl/include/sycl/khr/free_function_commands.hpp +++ b/sycl/include/sycl/khr/free_function_commands.hpp @@ -317,7 +317,7 @@ void launch_task(const sycl::queue &q, KernelType &&k, if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { detail::submit_kernel_direct_single_task( - q, std::forward(k), + q, std::forward(k), {}, ext::oneapi::experimental::empty_properties_t{}, codeLoc); } else { submit(q, [&](handler &h) { launch_task(h, k); }, codeLoc); diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index f36f4da661554..71e2479fb8346 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -40,6 +40,7 @@ #include // for nd_range #include // for property_list #include // for range +#include // for sycl::span #include // for size_t #include // for function @@ -68,6 +69,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -76,6 +78,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -165,7 +168,7 @@ template auto submit_kernel_direct( const queue &Queue, const nd_range &Range, - KernelTypeUniversalRef &&KernelFunc, + KernelTypeUniversalRef &&KernelFunc, sycl::span DepEvents, const PropertiesT &ExtraProps = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { @@ -230,12 +233,14 @@ auto submit_kernel_direct( if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties, - TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); + Queue, Range, HostKernel, DeviceKernelInfoPtr, DepEvents, + ParsedProperties, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); } else { submit_kernel_direct_without_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties, - TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); + Queue, Range, HostKernel, DeviceKernelInfoPtr, DepEvents, + ParsedProperties, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); } } @@ -244,7 +249,7 @@ template auto submit_kernel_direct_parallel_for( const queue &Queue, const nd_range &Range, - KernelTypeUniversalRef &&KernelFunc, + KernelTypeUniversalRef &&KernelFunc, sycl::span DepEvents = {}, const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { @@ -266,8 +271,8 @@ auto submit_kernel_direct_parallel_for( return submit_kernel_direct( - Queue, Range, std::forward(KernelFunc), Props, - CodeLoc); + Queue, Range, std::forward(KernelFunc), DepEvents, + Props, CodeLoc); } template auto submit_kernel_direct_single_task( const queue &Queue, KernelTypeUniversalRef &&KernelFunc, + sycl::span DepEvents = {}, const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { @@ -282,7 +288,8 @@ auto submit_kernel_direct_single_task( EventNeeded, PropertiesT, KernelTypeUniversalRef, 1>( Queue, nd_range<1>{1, 1}, - std::forward(KernelFunc), Props, CodeLoc); + std::forward(KernelFunc), DepEvents, Props, + CodeLoc); } } // namespace detail @@ -2797,12 +2804,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + assert(false); + // TODO The handler-less path does not support kernel functions // with the kernel_handler type argument yet. if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { + return detail::submit_kernel_direct_single_task( - *this, KernelFunc, Properties, TlsCodeLocCapture.query()); + *this, KernelFunc, {}, Properties, TlsCodeLocCapture.query()); } else { return submit( [&](handler &CGH) { @@ -2852,13 +2862,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "Use queue.submit() instead"); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvent); - CGH.template single_task( - Properties, KernelFunc); - }, - TlsCodeLocCapture.query()); + + assert(false); + + // TODO The handler-less path does not support kernel functions + // with the kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { + + return detail::submit_kernel_direct_single_task( + *this, KernelFunc, sycl::span(&DepEvent, 1), Properties, + TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.template single_task( + Properties, KernelFunc); + }, + TlsCodeLocCapture.query()); + } } /// single_task version with a kernel represented as a lambda. @@ -2903,13 +2926,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { "Use queue.submit() instead"); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvents); - CGH.template single_task( - Properties, KernelFunc); - }, - TlsCodeLocCapture.query()); + + assert(false); + + // TODO The handler-less path does not support kernel functions + // with the kernel_handler type argument yet. + if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT::value)) { + + return detail::submit_kernel_direct_single_task( + *this, KernelFunc, DepEvents, Properties, TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.template single_task( + Properties, KernelFunc); + }, + TlsCodeLocCapture.query()); + } } /// single_task version with a kernel represented as a lambda. @@ -3341,6 +3376,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); using KernelType = std::tuple_element_t<0, std::tuple>; + assert(false); + // TODO The handler-less path does not support reductions, and // kernel functions with the kernel_handler type argument yet. if constexpr (sizeof...(RestT) == 1 && @@ -3348,7 +3385,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, Rest..., Properties, TlsCodeLocCapture.query()); + *this, Range, Rest..., {}, Properties, TlsCodeLocCapture.query()); } else return submit( [&](handler &CGH) { @@ -3377,7 +3414,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { !(detail::KernelLambdaHasKernelHandlerArgT< KernelType, sycl::nd_item>::value)) { return detail::submit_kernel_direct_parallel_for( - *this, Range, Rest..., + *this, Range, Rest..., {}, ext::oneapi::experimental::empty_properties_t{}, TlsCodeLocCapture.query()); } else { @@ -3410,6 +3447,16 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { PropertiesT Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + using KernelType = std::tuple_element_t<0, std::tuple>; + + assert(false); + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + } return submit( [&](handler &CGH) { CGH.depends_on(DepEvent); @@ -3431,12 +3478,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { parallel_for(nd_range Range, event DepEvent, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvent); - CGH.template parallel_for(Range, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., sycl::span(&DepEvent, 1), + ext::oneapi::experimental::empty_properties_t{}, + TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.template parallel_for(Range, Rest...); + }, + TlsCodeLocCapture.query()); + } } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -3462,6 +3522,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { PropertiesT Properties, RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + using KernelType = std::tuple_element_t<0, std::tuple>; + + assert(false); + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + } + return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); @@ -3485,12 +3556,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { RestT &&...Rest) { constexpr detail::code_location CodeLoc = getCodeLocation(); detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit( - [&](handler &CGH) { - CGH.depends_on(DepEvents); - CGH.template parallel_for(Range, Rest...); - }, - TlsCodeLocCapture.query()); + using KernelType = std::tuple_element_t<0, std::tuple>; + + // TODO The handler-less path does not support reductions, and + // kernel functions with the kernel_handler type argument yet. + if constexpr (sizeof...(RestT) == 1 && + !(detail::KernelLambdaHasKernelHandlerArgT< + KernelType, sycl::nd_item>::value)) { + return detail::submit_kernel_direct_parallel_for( + *this, Range, Rest..., DepEvents, + ext::oneapi::experimental::empty_properties_t{}, + TlsCodeLocCapture.query()); + } else { + return submit( + [&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.template parallel_for(Range, Rest...); + }, + TlsCodeLocCapture.query()); + } } /// Copies data from a memory region pointed to by a placeholder accessor to diff --git a/sycl/source/detail/event_deps.hpp b/sycl/source/detail/event_deps.hpp new file mode 100644 index 0000000000000..fb30099dc9821 --- /dev/null +++ b/sycl/source/detail/event_deps.hpp @@ -0,0 +1,125 @@ +//==---------------- event_deps.hpp - SYCL event dependency utils ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +/// Adds an event dependency to the list of dependencies, performing +/// a series of checks. +/// +/// If the event is associated with a graph, and the queue is not, +/// the queue will be switched to a recording mode (transitive queue +/// recording feature). +/// +/// The LockQueue template argument defines whether the queue lock +/// should be acquired for the transition to a recording mode. It is +/// set to false in cases where the event dependencies are set directly +/// in the command submission flow and the lock is already acquired. +/// +/// \param EventImpl Event to register as a dependency +/// \param EventsRegistered A list of already registered events, where +/// the event will be added. +/// \param QueueImpl A queue associated with the event dependencies. Can +/// be nullptr if no associated queue. +/// \param ContextImpl A context associated with a queue or graph. +/// \param DeviceImpl A device associated with a queue or graph. +/// \param GraphImpl A graph associated with a queue or a handler. Can +/// be nullptr if no associated graph. +/// \param CommandGroupType Type of command group. +template +void registerEventDependency( + const EventImplPtr &EventImpl, std::vector &EventsRegistered, + queue_impl *QueueImpl, const context_impl &ContextImpl, + const device_impl &DeviceImpl, + const ext::oneapi::experimental::detail::graph_impl *GraphImpl, + CGType CommandGroupType) { + + if (!EventImpl) + return; + if (EventImpl->isDiscarded()) { + throw sycl::exception(make_error_code(errc::invalid), + "Queue operation cannot depend on discarded event."); + } + + // Async alloc calls adapter immediately. Any explicit/implicit dependencies + // are handled at that point, including in order queue deps. Further calls to + // depends_on after an async alloc are explicitly disallowed. + if (CommandGroupType == CGType::AsyncAlloc) { + throw sycl::exception(make_error_code(errc::invalid), + "Cannot submit a dependency after an asynchronous " + "allocation has already been executed!"); + } + + auto EventGraph = EventImpl->getCommandGraph(); + if (QueueImpl && EventGraph) { + auto QueueGraph = QueueImpl->getCommandGraph(); + + if (&EventGraph->getContextImpl() != &ContextImpl) { + throw sycl::exception( + make_error_code(errc::invalid), + "Cannot submit to a queue with a dependency from a graph that is " + "associated with a different context."); + } + + if (&EventGraph->getDeviceImpl() != &DeviceImpl) { + throw sycl::exception( + make_error_code(errc::invalid), + "Cannot submit to a queue with a dependency from a graph that is " + "associated with a different device."); + } + + if (QueueGraph && QueueGraph != EventGraph) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Cannot submit to a recording queue with a " + "dependency from a different graph."); + } + + // If the event dependency has a graph, that means that the queue that + // created it was in recording mode. If the current queue is not recording, + // we need to set it to recording (implements the transitive queue recording + // feature). + if (!QueueGraph) { + if constexpr (LockQueue) { + EventGraph->beginRecording(*QueueImpl); + } else { + EventGraph->beginRecordingUnlockedQueue(*QueueImpl); + } + } + } + + if (GraphImpl) { + if (EventGraph == nullptr) { + throw sycl::exception( + make_error_code(errc::invalid), + "Graph nodes cannot depend on events from outside the graph."); + } + if (EventGraph.get() != GraphImpl) { + throw sycl::exception( + make_error_code(errc::invalid), + "Graph nodes cannot depend on events from another graph."); + } + } + EventsRegistered.push_back(EventImpl); +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index ad5058361b6d4..9f878f0e8ea66 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -690,6 +690,14 @@ std::vector graph_impl::getExitNodesEvents( return Events; } +void graph_impl::beginRecordingUnlockedQueue(sycl::detail::queue_impl &Queue) { + graph_impl::WriteLock Lock(MMutex); + if (!Queue.hasCommandGraph()) { + Queue.setCommandGraphUnlocked(shared_from_this()); + addQueue(Queue); + } +} + void graph_impl::beginRecording(sycl::detail::queue_impl &Queue) { graph_impl::WriteLock Lock(MMutex); if (!Queue.hasCommandGraph()) { diff --git a/sycl/source/detail/graph/graph_impl.hpp b/sycl/source/detail/graph/graph_impl.hpp index 04f4bef37eadf..10cbbfab0282c 100644 --- a/sycl/source/detail/graph/graph_impl.hpp +++ b/sycl/source/detail/graph/graph_impl.hpp @@ -461,6 +461,15 @@ class graph_impl : public std::enable_shared_from_this { std::vector getExitNodesEvents(std::weak_ptr Queue); + /// Sets the Queue state to queue_state::recording. Adds the queue to the list + /// of recording queues associated with this graph. + /// Does not take the queue submission lock. + /// + /// Required for the cases, when the recording is started directly + /// from within the kernel submission flow. + /// @param[in] Queue The queue to be recorded from. + void beginRecordingUnlockedQueue(sycl::detail::queue_impl &Queue); + /// Sets the Queue state to queue_state::recording. Adds the queue to the list /// of recording queues associated with this graph. /// @param[in] Queue The queue to be recorded from. diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index fe9173d21b367..5b7bfb5e90fae 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include #include @@ -567,6 +568,7 @@ EventImplPtr queue_impl::submit_command_to_graph( EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { @@ -624,12 +626,13 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( *this, true); }; - return submit_direct(CallerNeedsEvent, SubmitKernelFunc); + return submit_direct(CallerNeedsEvent, DepEvents, SubmitKernelFunc); } template detail::EventImplPtr queue_impl::submit_direct(bool CallerNeedsEvent, + sycl::span DepEvents, SubmitCommandFuncType &SubmitCommandFunc) { detail::CG::StorageInitHelper CGData; std::unique_lock Lock(MMutex); @@ -640,7 +643,10 @@ queue_impl::submit_direct(bool CallerNeedsEvent, // Sync with an external event std::optional ExternalEvent = popExternalEvent(); if (ExternalEvent) { - CGData.MEvents.push_back(getSyclObjImpl(*ExternalEvent)); + registerEventDependency( + getSyclObjImpl(*ExternalEvent), CGData.MEvents, this, getContextImpl(), + getDeviceImpl(), hasCommandGraph() ? getCommandGraph().get() : nullptr, + detail::CGType::Kernel); } auto &Deps = hasCommandGraph() ? MExtGraphDeps : MDefaultGraphDeps; @@ -648,7 +654,17 @@ queue_impl::submit_direct(bool CallerNeedsEvent, // Sync with the last event for in order queue EventImplPtr &LastEvent = Deps.LastEventPtr; if (isInOrder() && LastEvent) { - CGData.MEvents.push_back(LastEvent); + registerEventDependency( + LastEvent, CGData.MEvents, this, getContextImpl(), getDeviceImpl(), + hasCommandGraph() ? getCommandGraph().get() : nullptr, + detail::CGType::Kernel); + } + + for (event e : DepEvents) { + registerEventDependency( + getSyclObjImpl(e), CGData.MEvents, this, getContextImpl(), + getDeviceImpl(), hasCommandGraph() ? getCommandGraph().get() : nullptr, + detail::CGType::Kernel); } // Barrier and un-enqueued commands synchronization for out or order queue diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index d93e0b62a13c6..7c793b619ecab 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -364,11 +364,12 @@ class queue_impl : public std::enable_shared_from_this { event submit_kernel_direct_with_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - detail::EventImplPtr EventImpl = - submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - true, Props, CodeLoc, IsTopCodeLoc); + detail::EventImplPtr EventImpl = submit_kernel_direct_impl( + NDRDescT{Range}, HostKernel, DeviceKernelInfo, + /*CallerNeedsEvent*/ true, DepEvents, Props, CodeLoc, IsTopCodeLoc); return createSyclObjFromImpl(EventImpl); } @@ -376,10 +377,12 @@ class queue_impl : public std::enable_shared_from_this { void submit_kernel_direct_without_event( const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - false, Props, CodeLoc, IsTopCodeLoc); + /*CallerNeedsEvent*/ false, DepEvents, Props, + CodeLoc, IsTopCodeLoc); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, @@ -598,9 +601,8 @@ class queue_impl : public std::enable_shared_from_this { const std::vector &DepEvents, bool CallerNeedsEvent); - void setCommandGraph( + void setCommandGraphUnlocked( std::shared_ptr Graph) { - std::lock_guard Lock(MMutex); MGraph = Graph; MExtGraphDeps.reset(); @@ -611,6 +613,12 @@ class queue_impl : public std::enable_shared_from_this { } } + void setCommandGraph( + std::shared_ptr Graph) { + std::lock_guard Lock(MMutex); + setCommandGraphUnlocked(Graph); + } + std::shared_ptr getCommandGraph() const { return MGraph.lock(); @@ -910,11 +918,13 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_kernel_direct_impl( const NDRDescT &NDRDesc, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, bool CallerNeedsEvent, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); template EventImplPtr submit_direct(bool CallerNeedsEvent, + sycl::span DepEvents, SubmitCommandFuncType &SubmitCommandFunc); /// Helper function for submitting a memory operation with a handler. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f9440c089f2a0..fbcd88f1bd42a 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -1680,69 +1681,10 @@ void handler::depends_on(const std::vector &Events) { } void handler::depends_on(const detail::EventImplPtr &EventImpl) { - if (!EventImpl) - return; - if (EventImpl->isDiscarded()) { - throw sycl::exception(make_error_code(errc::invalid), - "Queue operation cannot depend on discarded event."); - } - - // Async alloc calls adapter immediately. Any explicit/implicit dependencies - // are handled at that point, including in order queue deps. Further calls to - // depends_on after an async alloc are explicitly disallowed. - if (getType() == CGType::AsyncAlloc) { - throw sycl::exception(make_error_code(errc::invalid), - "Cannot submit a dependency after an asynchronous " - "allocation has already been executed!"); - } - - auto EventGraph = EventImpl->getCommandGraph(); - queue_impl *Queue = impl->get_queue_or_null(); - if (Queue && EventGraph) { - auto QueueGraph = Queue->getCommandGraph(); - - if (&EventGraph->getContextImpl() != &impl->get_context()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Cannot submit to a queue with a dependency from a graph that is " - "associated with a different context."); - } - - if (&EventGraph->getDeviceImpl() != &impl->get_device()) { - throw sycl::exception( - make_error_code(errc::invalid), - "Cannot submit to a queue with a dependency from a graph that is " - "associated with a different device."); - } - - if (QueueGraph && QueueGraph != EventGraph) { - throw sycl::exception(sycl::make_error_code(errc::invalid), - "Cannot submit to a recording queue with a " - "dependency from a different graph."); - } - - // If the event dependency has a graph, that means that the queue that - // created it was in recording mode. If the current queue is not recording, - // we need to set it to recording (implements the transitive queue recording - // feature). - if (!QueueGraph) { - EventGraph->beginRecording(*Queue); - } - } - - if (auto Graph = getCommandGraph(); Graph) { - if (EventGraph == nullptr) { - throw sycl::exception( - make_error_code(errc::invalid), - "Graph nodes cannot depend on events from outside the graph."); - } - if (EventGraph != Graph) { - throw sycl::exception( - make_error_code(errc::invalid), - "Graph nodes cannot depend on events from another graph."); - } - } - impl->CGData.MEvents.push_back(EventImpl); + registerEventDependency(EventImpl, impl->CGData.MEvents, + impl->get_queue_or_null(), impl->get_context(), + impl->get_device(), getCommandGraph().get(), + getType()); } void handler::depends_on(const std::vector &Events) { diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 7fe5649aecc2a..49f1724127c7a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -476,16 +476,19 @@ event submit_kernel_direct_with_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( - Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, DepEvents, Props, CodeLoc, + IsTopCodeLoc); } template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -493,6 +496,7 @@ template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -500,6 +504,7 @@ template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -508,16 +513,19 @@ void submit_kernel_direct_without_event_impl( const queue &Queue, const nd_range &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( - Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc); + Range, HostKernel, DeviceKernelInfo, DepEvents, Props, CodeLoc, + IsTopCodeLoc); } template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( const queue &Queue, const nd_range<1> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -525,6 +533,7 @@ template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( const queue &Queue, const nd_range<2> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); @@ -532,6 +541,7 @@ template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( const queue &Queue, const nd_range<3> &Range, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, + sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); diff --git a/sycl/test/abi/layout_span.cpp b/sycl/test/abi/layout_span.cpp new file mode 100644 index 0000000000000..49fdce6d79f58 --- /dev/null +++ b/sycl/test/abi/layout_span.cpp @@ -0,0 +1,22 @@ +// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s +// REQUIRES: linux +// UNSUPPORTED: libcxx + +// clang-format off + +#include + +void span(sycl::span) {} +// CHECK: 0 | class sycl::span +// CHECK-NEXT: 0 | pointer __data +// CHECK-NEXT: | [sizeof=8, dsize=8, align=8, +// CHECK-NEXT: | nvsize=8, nvalign=8] + +//---------------------------- + +void span_dynamic_extent(sycl::span) {} +// CHECK: 0 | class sycl::span +// CHECK-NEXT: 0 | pointer __data +// CHECK-NEXT: 8 | size_type __size +// CHECK-NEXT: | [sizeof=16, dsize=16, align=8, +// CHECK-NEXT: | nvsize=16, nvalign=8] diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 5e58ac99474d3..e42759431d374 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,12 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSH_5intel12experimental12cache_configENSJ_17use_root_sync_keyENSJ_23work_group_progress_keyENSJ_22sub_group_progress_keyENSJ_22work_item_progress_keyENSJ_4cuda12cluster_sizeILi1EEENST_ILi2EEENST_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f7e0f0f062223..1db98f1b5cf9d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,12 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z +??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z +??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z diff --git a/sycl/test/basic_tests/single_task_error_message.cpp b/sycl/test/basic_tests/single_task_error_message.cpp index b086c81e4f2af..7c6f020741247 100644 --- a/sycl/test/basic_tests/single_task_error_message.cpp +++ b/sycl/test/basic_tests/single_task_error_message.cpp @@ -31,6 +31,9 @@ int main() { [&](sycl::handler &cgh) { // expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': sycl::queue.single_task() requires a kernel instead of command group.{{.*}} Use queue.submit() instead}} // expected-error-re@sycl/detail/cg_types.hpp:* {{no matching function for call to object of type '(lambda at {{.*}}single_task_error_message.cpp:{{.*}})'}} + // TODO Investigate why this function template is not + // instantiated (if this is expected). + // expected-error@sycl/detail/cg_types.hpp:* {{no matching function for call to 'runKernelWithoutArg'}} }) .wait(); } @@ -47,6 +50,9 @@ int main() { [&](sycl::handler &cgh) { // expected-error-re@sycl/queue.hpp:* {{static assertion failed due to requirement '{{.*}}': sycl::queue.single_task() requires a kernel instead of command group.{{.*}} Use queue.submit() instead}} // expected-error-re@sycl/detail/cg_types.hpp:* {{no matching function for call to object of type '(lambda at {{.*}}single_task_error_message.cpp:{{.*}})'}} + // TODO Investigate why this function template is not + // instantiated (if this is expected). + // expected-error@sycl/detail/cg_types.hpp:* {{no matching function for call to 'runKernelWithoutArg'}} }) .wait(); } diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index ed1175ba9f57b..5174c8a29bb6b 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -151,5 +151,6 @@ // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp +// CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: feature_test.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index b6c5ac3144887..76570a99bdda7 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -155,4 +155,5 @@ // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp +// CHECK-NEXT: sycl_span.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index 457978c1753c7..f2feef5bd9871 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -170,6 +170,7 @@ // CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp +// CHECK-NEXT: sycl_span.hpp // CHECK-NEXT: usm/usm_pointer_info.hpp // CHECK-NEXT: usm/usm_allocator.hpp // CHECK-EMPTY: