Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9571fec
[SYCL] Add structure to pass references to ranges
Alexandr-Konovalov Aug 14, 2025
2521805
Fix formatting.
Alexandr-Konovalov Aug 14, 2025
1a90e48
Fix const.
Alexandr-Konovalov Aug 14, 2025
38945f3
Rename RangesRefT to ranges_ref_view and move to sycl::detail namespace.
Alexandr-Konovalov Aug 18, 2025
d4c267e
Fix formatting.
Alexandr-Konovalov Aug 18, 2025
0d76be9
Move assignment near ctors.
Alexandr-Konovalov Aug 19, 2025
858bf21
Decrease dumpilcation is the test.
Alexandr-Konovalov Aug 19, 2025
95beb6a
Fix formatting.
Alexandr-Konovalov Aug 19, 2025
f2e347d
Fix formatting.
Alexandr-Konovalov Aug 19, 2025
71e2eee
Add export to sycl::detail::NDRDescT.
Alexandr-Konovalov Aug 22, 2025
7a3f269
Fix formatting.
Alexandr-Konovalov Aug 22, 2025
e6ab5e7
Merge branch 'sycl' into Alexandr-Konovalov/ref-ndrange
Alexandr-Konovalov Aug 25, 2025
d309440
Rename sycl::detail::ranges_ref_view to nd_range_view.
Alexandr-Konovalov Aug 27, 2025
c7bc868
Fix formatting.
Alexandr-Konovalov Aug 27, 2025
2754978
Merge branch 'sycl' into Alexandr-Konovalov/ref-ndrange
Alexandr-Konovalov Sep 8, 2025
d635399
Merge branch 'sycl' into Alexandr-Konovalov/ref-ndrange
Alexandr-Konovalov Sep 8, 2025
063997e
Merge branch 'sycl' into nd_range_view_struct_only
slawekptak Oct 30, 2025
a3ae6c0
Update the nd_range_view and NDRDescT conversion
slawekptak Oct 31, 2025
fbebaef
Merge branch 'sycl' into nd_range_view_struct_and_no_handler
slawekptak Oct 31, 2025
ce2a223
Merge branch 'sycl' into nd_range_view_struct_and_no_handler
slawekptak Nov 3, 2025
8c970ab
Update Linux symbols
slawekptak Nov 3, 2025
8a434a1
Update the include deps tests
slawekptak Nov 3, 2025
fa35c99
Add nd_range_view layout tests
slawekptak Nov 3, 2025
b4184b4
Move the nd_range_view layout test to a separate file
slawekptak Nov 3, 2025
1677e6d
Merge branch 'sycl' into nd_range_view_struct_and_no_handler
slawekptak Nov 4, 2025
fa083a4
Update Linux symbols
slawekptak Nov 4, 2025
8227e9a
Update Windows symbols
slawekptak Nov 4, 2025
df435a0
Fix formatting
slawekptak Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions sycl/include/sycl/detail/nd_range_view.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//==---- nd_range_view.hpp --- SYCL iteration with reference to ranges ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/nd_range.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

class NDRDescT;

// The structure to keep dimension and references to ranges unified for
// all dimensions.
class nd_range_view {

public:
nd_range_view() = default;
nd_range_view(const nd_range_view &Desc) = default;
nd_range_view(nd_range_view &&Desc) = default;
nd_range_view &operator=(const nd_range_view &Desc) = default;
nd_range_view &operator=(nd_range_view &&Desc) = default;

template <int Dims_>
nd_range_view(sycl::range<Dims_> &N, bool SetNumWorkGroups = false)
: MGlobalSize(&(N[0])), MSetNumWorkGroups(SetNumWorkGroups),
MDims{size_t(Dims_)} {}

template <int Dims_>
nd_range_view(sycl::range<Dims_> &GlobalSize, sycl::id<Dims_> &Offset)
: MGlobalSize(&(GlobalSize[0])), MOffset(&(Offset[0])),
MDims{size_t(Dims_)} {}

template <int Dims_>
nd_range_view(sycl::nd_range<Dims_> &ExecutionRange)
: MGlobalSize(&(ExecutionRange.globalSize[0])),
MLocalSize(&(ExecutionRange.localSize[0])),
MOffset(&(ExecutionRange.offset[0])), MDims{size_t(Dims_)} {}

sycl::detail::NDRDescT toNDRDescT() const;

const size_t *MGlobalSize = nullptr;
const size_t *MLocalSize = nullptr;
const size_t *MOffset = nullptr;
bool MSetNumWorkGroups = false;
size_t MDims = 0;
};

} // namespace detail
} // namespace _V1
} // namespace sycl
6 changes: 6 additions & 0 deletions sycl/include/sycl/nd_range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
namespace sycl {
inline namespace _V1 {

namespace detail {
class nd_range_view;
}

/// Defines the iteration domain of both the work-groups and the overall
/// dispatch.
///
Expand Down Expand Up @@ -65,6 +69,8 @@ template <int Dimensions = 1> class nd_range {
bool operator!=(const nd_range<Dimensions> &rhs) const {
return !(*this == rhs);
}

friend class sycl::_V1::detail::nd_range_view;
};

} // namespace _V1
Expand Down
21 changes: 10 additions & 11 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <sycl/detail/id_queries_fit_in_int.hpp> // for checkValueRange
#include <sycl/detail/info_desc_helpers.hpp> // for is_queue_info_...
#include <sycl/detail/kernel_desc.hpp> // for KernelInfo
#include <sycl/detail/nd_range_view.hpp> // for nd_range_view
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove the comment because it is useless. The nd_range_view.hpp name is self-descriptive.

#include <sycl/detail/optional.hpp>
#include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
#include <sycl/device.hpp> // for device
Expand Down Expand Up @@ -63,17 +64,15 @@ template <backend BackendName, class SyclObjectT>
auto get_native(const SyclObjectT &Obj)
-> backend_return_t<BackendName, SyclObjectT>;

template <int Dims>
event __SYCL_EXPORT submit_kernel_direct_with_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
const queue &Queue, detail::nd_range_view RangeView,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template <int Dims>
void __SYCL_EXPORT submit_kernel_direct_without_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
const queue &Queue, detail::nd_range_view RangeView,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
Expand Down Expand Up @@ -164,7 +163,7 @@ template <detail::WrapAs WrapAs, typename LambdaArgType,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct(
const queue &Queue, const nd_range<Dims> &Range,
const queue &Queue, detail::nd_range_view RangeView,
KernelTypeUniversalRef &&KernelFunc,
const PropertiesT &ExtraProps =
ext::oneapi::experimental::empty_properties_t{},
Expand Down Expand Up @@ -230,11 +229,11 @@ auto submit_kernel_direct(

if constexpr (EventNeeded) {
return submit_kernel_direct_with_event_impl(
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
Queue, RangeView, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
} else {
submit_kernel_direct_without_event_impl(
Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
Queue, RangeView, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
}
}
Expand All @@ -243,7 +242,7 @@ template <typename KernelName = detail::auto_name, bool EventNeeded = false,
typename PropertiesT = ext::oneapi::experimental::empty_properties_t,
typename KernelTypeUniversalRef, int Dims>
auto submit_kernel_direct_parallel_for(
const queue &Queue, const nd_range<Dims> &Range,
const queue &Queue, nd_range<Dims> Range,
KernelTypeUniversalRef &&KernelFunc,
const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{},
const detail::code_location &CodeLoc = detail::code_location::current()) {
Expand All @@ -266,8 +265,8 @@ auto submit_kernel_direct_parallel_for(
return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
KernelName, EventNeeded, PropertiesT,
KernelTypeUniversalRef, Dims>(
Queue, Range, std::forward<KernelTypeUniversalRef>(KernelFunc), Props,
CodeLoc);
Queue, detail::nd_range_view(Range),
std::forward<KernelTypeUniversalRef>(KernelFunc), Props, CodeLoc);
}

template <typename KernelName = detail::auto_name, bool EventNeeded = false,
Expand All @@ -281,7 +280,7 @@ auto submit_kernel_direct_single_task(
return submit_kernel_direct<detail::WrapAs::single_task, void, KernelName,
EventNeeded, PropertiesT, KernelTypeUniversalRef,
1>(
Queue, nd_range<1>{1, 1},
Queue, detail::nd_range_view(),
std::forward<KernelTypeUniversalRef>(KernelFunc), Props, CodeLoc);
}

Expand Down
39 changes: 25 additions & 14 deletions sycl/source/detail/ndrange_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,51 +32,62 @@ class NDRDescT {
NDRDescT(const NDRDescT &Desc) = default;
NDRDescT(NDRDescT &&Desc) = default;

template <int Dims_>
NDRDescT(sycl::range<Dims_> N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} {
NDRDescT(const size_t *N, bool SetNumWorkGroups, int DimsVal)
: Dims{size_t(DimsVal)} {
if (SetNumWorkGroups) {
for (size_t I = 0; I < Dims_; ++I) {
for (size_t I = 0; I < Dims; ++I) {
NumWorkGroups[I] = N[I];
}
} else {
for (size_t I = 0; I < Dims_; ++I) {
for (size_t I = 0; I < Dims; ++I) {
GlobalSize[I] = N[I];
}

for (int I = Dims_; I < 3; ++I) {
for (int I = Dims; I < 3; ++I) {
GlobalSize[I] = 1;
}
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::range<Dims_> LocalSizes,
sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
NDRDescT(sycl::range<Dims_> N, bool SetNumWorkGroups)
: NDRDescT(&(N[0]), SetNumWorkGroups, Dims_) {}

NDRDescT(const size_t *NumWorkItems, const size_t *LocalSizes,
const size_t *Offset, int DimsVal)
: Dims{size_t(DimsVal)} {
for (size_t I = 0; I < Dims; ++I) {
GlobalSize[I] = NumWorkItems[I];
LocalSize[I] = LocalSizes[I];
GlobalOffset[I] = Offset[I];
}

for (int I = Dims_; I < 3; ++I) {
for (int I = Dims; I < 3; ++I) {
LocalSize[I] = LocalSizes[0] ? 1 : 0;
}

for (int I = Dims_; I < 3; ++I) {
for (int I = Dims; I < 3; ++I) {
GlobalSize[I] = 1;
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::range<Dims_> LocalSizes,
sycl::id<Dims_> Offset)
: NDRDescT(&(NumWorkItems[0]), &(LocalSizes[0]), &(Offset[0]), Dims_) {}

NDRDescT(const size_t *NumWorkItems, const size_t *Offset, int DimsVal)
: Dims{size_t(DimsVal)} {
for (size_t I = 0; I < Dims; ++I) {
GlobalSize[I] = NumWorkItems[I];
GlobalOffset[I] = Offset[I];
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset)
: NDRDescT(&(NumWorkItems[0]), &(Offset[0]), Dims_) {}

template <int Dims_>
NDRDescT(sycl::nd_range<Dims_> ExecutionRange)
: NDRDescT(ExecutionRange.get_global_range(),
Expand Down
13 changes: 13 additions & 0 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <detail/queue_impl.hpp>
#include <sycl/context.hpp>
#include <sycl/detail/common.hpp>
#include <sycl/detail/nd_range_view.hpp>
#include <sycl/detail/ur.hpp>
#include <sycl/device.hpp>

Expand Down Expand Up @@ -125,6 +126,18 @@ prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) {
return detail::createSyclObjFromImpl<event>(EventImpl);
}

sycl::detail::NDRDescT nd_range_view::toNDRDescT() const {
if (!MGlobalSize) {
return NDRDescT(nd_range<1>{1, 1});
} else if (MLocalSize) {
return NDRDescT(MGlobalSize, MLocalSize, MOffset, MDims);
} else if (MOffset) {
return NDRDescT(MGlobalSize, MOffset, MDims);
} else {
return NDRDescT(MGlobalSize, MSetNumWorkGroups, MDims);
}
}

const std::vector<event> &
queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
std::vector<event> &MutableVec,
Expand Down
17 changes: 8 additions & 9 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,26 +360,25 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
return createSyclObjFromImpl<event>(ResEvent);
}

template <int Dims>
event submit_kernel_direct_with_event(
const nd_range<Dims> &Range, detail::HostKernelRefBase &HostKernel,
detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
detail::EventImplPtr EventImpl =
submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo,
true, Props, CodeLoc, IsTopCodeLoc);
detail::EventImplPtr EventImpl = submit_kernel_direct_impl(
RangeView.toNDRDescT(), HostKernel, DeviceKernelInfo, true, Props,
CodeLoc, IsTopCodeLoc);
return createSyclObjFromImpl<event>(EventImpl);
}

template <int Dims>
void submit_kernel_direct_without_event(
const nd_range<Dims> &Range, detail::HostKernelRefBase &HostKernel,
detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo,
false, Props, CodeLoc, IsTopCodeLoc);
submit_kernel_direct_impl(RangeView.toNDRDescT(), HostKernel,
DeviceKernelInfo, false, Props, CodeLoc,
IsTopCodeLoc);
}

void submit_without_event(const detail::type_erased_cgfo_ty &CGF,
Expand Down
52 changes: 4 additions & 48 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,70 +471,26 @@ void queue::ext_oneapi_set_external_event(const event &external_event) {

const property_list &queue::getPropList() const { return impl->getPropList(); }

template <int Dims>
event submit_kernel_direct_with_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
const queue &Queue, detail::nd_range_view RangeView,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
return getSyclObjImpl(Queue)->submit_kernel_direct_with_event(
Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc);
RangeView, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc);
}

template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>(
const queue &Queue, const nd_range<1> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>(
const queue &Queue, const nd_range<2> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>(
const queue &Queue, const nd_range<3> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template <int Dims>
void submit_kernel_direct_without_event_impl(
const queue &Queue, const nd_range<Dims> &Range,
const queue &Queue, detail::nd_range_view RangeView,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
getSyclObjImpl(Queue)->submit_kernel_direct_without_event(
Range, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc);
RangeView, HostKernel, DeviceKernelInfo, Props, CodeLoc, IsTopCodeLoc);
}

template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>(
const queue &Queue, const nd_range<1> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>(
const queue &Queue, const nd_range<2> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>(
const queue &Queue, const nd_range<3> &Range,
detail::HostKernelRefBase &HostKernel,
detail::DeviceKernelInfo *DeviceKernelInfo,
const detail::KernelPropertyHolderStructTy &Props,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);

} // namespace _V1
} // namespace sycl

Expand Down
19 changes: 19 additions & 0 deletions sycl/test/abi/layout_nd_range_view.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s
// RUN: %clangxx -fsycl -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s
// REQUIRES: linux
// UNSUPPORTED: libcxx

// clang-format off

#include <sycl/detail/nd_range_view.hpp>


SYCL_EXTERNAL void nd_range_view(sycl::detail::nd_range_view) {}
// CHECK: 0 | class sycl::detail::nd_range_view
// CHECK-NEXT: 0 | const size_t * MGlobalSize
// CHECK-NEXT: 8 | const size_t * MLocalSize
// CHECK-NEXT: 16 | const size_t * MOffset
// CHECK-NEXT: 24 | _Bool MSetNumWorkGroups
// CHECK-NEXT: 32 | size_t MDims
// CHECK-NEXT: | [sizeof=40, dsize=40, align=8,
// CHECK-NEXT: | nvsize=40, nvalign=8]
Loading