Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
186a811
Extend handler-less kernel submission path to API functions
slawekptak Oct 24, 2025
ffab4b5
Merge branch 'sycl' into no_handler_event_deps
slawekptak Oct 24, 2025
b769e49
Add an unlocked version of graph_impl::beginRecording
slawekptak Oct 27, 2025
4c82669
Use the const event type in sycl::span
slawekptak Oct 27, 2025
8c36583
Update Linux symbols, update tests
slawekptak Oct 28, 2025
256de78
Update Windows symbols
slawekptak Oct 28, 2025
0187376
Fix formatting
slawekptak Oct 28, 2025
7cd45e7
Fix comment
slawekptak Oct 29, 2025
497ad12
Avoid vector allocation for a single dependency
slawekptak Oct 31, 2025
50568ea
Add sycl::span layout test.
slawekptak Nov 4, 2025
a66128a
Add a new header with event dependency utilities.
slawekptak Nov 4, 2025
d0882d4
Limit the sycl::span layout test to host
slawekptak Nov 4, 2025
d1394be
Merge branch 'sycl' into no_handler_event_deps
slawekptak Nov 4, 2025
e780335
Update Linux symbols
slawekptak Nov 4, 2025
bece167
Update Windows symbols
slawekptak Nov 4, 2025
12aa7af
Enable properties parsing for kernel submission with event deps
slawekptak Nov 4, 2025
e35119c
Remove unused include and a new line
slawekptak Nov 4, 2025
07920ca
Update sycl/source/detail/event_deps.hpp
slawekptak Nov 4, 2025
43471f1
Fix function args
slawekptak Nov 4, 2025
9e046b5
Fix formatting
slawekptak Nov 4, 2025
6181837
Address review comments
slawekptak Nov 5, 2025
73cdaea
Temp - add asserts
slawekptak Nov 5, 2025
5c8df60
Move assertions
slawekptak Nov 5, 2025
378abb0
Switch asserts
slawekptak Nov 5, 2025
0b996a3
All functions asserts
slawekptak Nov 5, 2025
74b6fab
asserts on properties only
slawekptak Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ void single_task(queue Q, const KernelType &KernelObj,
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task<KernelName>(
std::move(Q), KernelObj, empty_properties_t{}, CodeLoc);
std::move(Q), KernelObj, {}, empty_properties_t{}, CodeLoc);
} else {
submit(
std::move(Q),
Expand Down Expand Up @@ -312,7 +312,7 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
LaunchConfigAccess(Config);

detail::submit_kernel_direct_parallel_for<KernelName>(
std::move(Q), LaunchConfigAccess.getRange(), KernelObj,
std::move(Q), LaunchConfigAccess.getRange(), KernelObj, {},
LaunchConfigAccess.getProperties());
} else {
submit(std::move(Q), [&](handler &CGH) {
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/sycl/khr/free_function_commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ void launch_task(const sycl::queue &q, KernelType &&k,
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {
detail::submit_kernel_direct_single_task(
q, std::forward<KernelType>(k),
q, std::forward<KernelType>(k), {},
ext::oneapi::experimental::empty_properties_t{}, codeLoc);
} else {
submit(q, [&](handler &h) { launch_task<KernelType>(h, k); }, codeLoc);
Expand Down
160 changes: 122 additions & 38 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <sycl/nd_range.hpp> // for nd_range
#include <sycl/property_list.hpp> // for property_list
#include <sycl/range.hpp> // for range
#include <sycl/sycl_span.hpp> // for sycl::span

#include <cstddef> // for size_t
#include <functional> // for function
Expand Down Expand Up @@ -68,6 +69,7 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
sycl::span<const event> DepEvents,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

Expand All @@ -76,6 +78,7 @@ void __SYCL_EXPORT submit_kernel_direct_without_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
sycl::span<const event> DepEvents,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

Expand Down Expand Up @@ -165,7 +168,7 @@ template <detail::WrapAs WrapAs, typename LambdaArgType,
typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct(
const queue &Queue, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
KernelTypeUniversalRef &&KernelFunc, sycl::span<const event> DepEvents,
const PropertiesT &ExtraProps =
ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {
Expand Down Expand Up @@ -230,12 +233,14 @@ auto submit_kernel_direct(

if constexpr (EventNeeded) {
return submit_kernel_direct_with_event_impl(
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
Queue, Range, HostKernel, DeviceKernelInfoPtr, DepEvents,
ParsedProperties, TlsCodeLocCapture.query(),
TlsCodeLocCapture.isToplevel());
} else {
submit_kernel_direct_without_event_impl(
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
Queue, Range, HostKernel, DeviceKernelInfoPtr, DepEvents,
ParsedProperties, TlsCodeLocCapture.query(),
TlsCodeLocCapture.isToplevel());
}
}

Expand All @@ -244,7 +249,7 @@ template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct_parallel_for(
const queue &Queue, const nd_range<Dims> &Range,
KernelTypeUniversalRef &&KernelFunc,
KernelTypeUniversalRef &&KernelFunc, sycl::span<const event> DepEvents = {},
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {

Expand All @@ -266,23 +271,25 @@ auto submit_kernel_direct_parallel_for(
return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
KernelName, EventNeeded, PropertiesT,
KernelTypeUniversalRef, Dims>(
Queue, Range, std::forward<KernelTypeUniversalRef>(KernelFunc), Props,
CodeLoc);
Queue, Range, std::forward<KernelTypeUniversalRef>(KernelFunc), DepEvents,
Props, CodeLoc);
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
typename KernelTypeUniversalRef>
auto submit_kernel_direct_single_task(
const queue &Queue, KernelTypeUniversalRef &&KernelFunc,
sycl::span<const event> DepEvents = {},
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {

return submit_kernel_direct<detail::WrapAs::single_task, void, KernelName,
EventNeeded, PropertiesT, KernelTypeUniversalRef,
1>(
Queue, nd_range<1>{1, 1},
std::forward<KernelTypeUniversalRef>(KernelFunc), Props, CodeLoc);
std::forward<KernelTypeUniversalRef>(KernelFunc), DepEvents, Props,
CodeLoc);
}

} // namespace detail
Expand Down Expand Up @@ -2797,12 +2804,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {

detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);

assert(false);

// TODO The handler-less path does not support kernel functions
// with the kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {

return detail::submit_kernel_direct_single_task<KernelName, true>(
*this, KernelFunc, Properties, TlsCodeLocCapture.query());
*this, KernelFunc, {}, Properties, TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
Expand Down Expand Up @@ -2852,13 +2862,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
"Use queue.submit() instead");

detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());

assert(false);

// TODO The handler-less path does not support kernel functions
// with the kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {

return detail::submit_kernel_direct_single_task<KernelName, true>(
*this, KernelFunc, sycl::span<const event>(&DepEvent, 1), Properties,
TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());
}
}

/// single_task version with a kernel represented as a lambda.
Expand Down Expand Up @@ -2903,13 +2926,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
"Use queue.submit() instead");

detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());

assert(false);

// TODO The handler-less path does not support kernel functions
// with the kernel_handler type argument yet.
if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
void>::value)) {

return detail::submit_kernel_direct_single_task<KernelName, true>(
*this, KernelFunc, DepEvents, Properties, TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.template single_task<KernelName, KernelType, PropertiesT>(
Properties, KernelFunc);
},
TlsCodeLocCapture.query());
}
}

/// single_task version with a kernel represented as a lambda.
Expand Down Expand Up @@ -3341,14 +3376,16 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

assert(false);

// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {

return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, Range, Rest..., Properties, TlsCodeLocCapture.query());
*this, Range, Rest..., {}, Properties, TlsCodeLocCapture.query());
} else
return submit(
[&](handler &CGH) {
Expand Down Expand Up @@ -3377,7 +3414,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, Range, Rest...,
*this, Range, Rest..., {},
ext::oneapi::experimental::empty_properties_t{},
TlsCodeLocCapture.query());
} else {
Expand Down Expand Up @@ -3410,6 +3447,16 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
PropertiesT Properties, RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

assert(false);

// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
}
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvent);
Expand All @@ -3431,12 +3478,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.template parallel_for<KernelName>(Range, Rest...);
},
TlsCodeLocCapture.query());
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, Range, Rest..., sycl::span<const event>(&DepEvent, 1),
ext::oneapi::experimental::empty_properties_t{},
TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvent);
CGH.template parallel_for<KernelName>(Range, Rest...);
},
TlsCodeLocCapture.query());
}
}

/// parallel_for version with a kernel represented as a lambda + nd_range that
Expand All @@ -3462,6 +3522,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
PropertiesT Properties, RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

assert(false);

// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
}

return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
Expand All @@ -3485,12 +3556,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.template parallel_for<KernelName>(Range, Rest...);
},
TlsCodeLocCapture.query());
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;

// TODO The handler-less path does not support reductions, and
// kernel functions with the kernel_handler type argument yet.
if constexpr (sizeof...(RestT) == 1 &&
!(detail::KernelLambdaHasKernelHandlerArgT<
KernelType, sycl::nd_item<Dims>>::value)) {
return detail::submit_kernel_direct_parallel_for<KernelName, true>(
*this, Range, Rest..., DepEvents,
ext::oneapi::experimental::empty_properties_t{},
TlsCodeLocCapture.query());
} else {
return submit(
[&](handler &CGH) {
CGH.depends_on(DepEvents);
CGH.template parallel_for<KernelName>(Range, Rest...);
},
TlsCodeLocCapture.query());
}
}

/// Copies data from a memory region pointed to by a placeholder accessor to
Expand Down
Loading
Loading