diff --git a/sycl/include/sycl/detail/nd_range_view.hpp b/sycl/include/sycl/detail/nd_range_view.hpp new file mode 100644 index 0000000000000..ea7f403f171b3 --- /dev/null +++ b/sycl/include/sycl/detail/nd_range_view.hpp @@ -0,0 +1,57 @@ +//==---- nd_range_view.hpp --- SYCL iteration with reference to ranges ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +class NDRDescT; + +// The structure to keep dimension and references to ranges unified for +// all dimensions. +class nd_range_view { + +public: + nd_range_view() = default; + nd_range_view(const nd_range_view &Desc) = default; + nd_range_view(nd_range_view &&Desc) = default; + nd_range_view &operator=(const nd_range_view &Desc) = default; + nd_range_view &operator=(nd_range_view &&Desc) = default; + + template + nd_range_view(sycl::range &N, bool SetNumWorkGroups = false) + : MGlobalSize(&(N[0])), MSetNumWorkGroups(SetNumWorkGroups), + MDims{size_t(Dims_)} {} + + template + nd_range_view(sycl::range &GlobalSize, sycl::id &Offset) + : MGlobalSize(&(GlobalSize[0])), MOffset(&(Offset[0])), + MDims{size_t(Dims_)} {} + + template + nd_range_view(sycl::nd_range &ExecutionRange) + : MGlobalSize(&(ExecutionRange.globalSize[0])), + MLocalSize(&(ExecutionRange.localSize[0])), + MOffset(&(ExecutionRange.offset[0])), MDims{size_t(Dims_)} {} + + sycl::detail::NDRDescT toNDRDescT() const; + + const size_t *MGlobalSize = nullptr; + const size_t *MLocalSize = nullptr; + const size_t *MOffset = nullptr; + bool MSetNumWorkGroups = false; + size_t MDims = 0; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/nd_range.hpp b/sycl/include/sycl/nd_range.hpp index e4ff4881be17a..30816b8a4b354 100644 --- a/sycl/include/sycl/nd_range.hpp +++ b/sycl/include/sycl/nd_range.hpp @@ -15,6 +15,10 @@ namespace sycl { inline namespace _V1 { +namespace detail { +class nd_range_view; +} + /// Defines the iteration domain of both the work-groups and the overall /// dispatch. /// @@ -65,6 +69,8 @@ template class nd_range { bool operator!=(const nd_range &rhs) const { return !(*this == rhs); } + + friend class sycl::_V1::detail::nd_range_view; }; } // namespace _V1 diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index f7bff57c2df9a..b220f33aafa22 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -21,6 +21,7 @@ #include // for checkValueRange #include // for is_queue_info_... #include // for KernelInfo +#include #include #include // for OwnerLessBase #include // for device @@ -64,18 +65,16 @@ template auto get_native(const SyclObjectT &Obj) -> backend_return_t; -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc); -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, @@ -167,7 +166,7 @@ template auto submit_kernel_direct( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, KernelTypeUniversalRef &&KernelFunc, sycl::span DepEvents, const PropertiesT &ExtraProps = ext::oneapi::experimental::empty_properties_t{}, @@ -233,12 +232,12 @@ auto submit_kernel_direct( if constexpr (EventNeeded) { return submit_kernel_direct_with_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, DepEvents, + Queue, RangeView, HostKernel, DeviceKernelInfoPtr, DepEvents, ParsedProperties, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } else { submit_kernel_direct_without_event_impl( - Queue, Range, HostKernel, DeviceKernelInfoPtr, DepEvents, + Queue, RangeView, HostKernel, DeviceKernelInfoPtr, DepEvents, ParsedProperties, TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel()); } @@ -248,7 +247,7 @@ template auto submit_kernel_direct_parallel_for( - const queue &Queue, const nd_range &Range, + const queue &Queue, nd_range Range, KernelTypeUniversalRef &&KernelFunc, sycl::span DepEvents = {}, const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t{}, const detail::code_location &CodeLoc = detail::code_location::current()) { @@ -271,8 +270,9 @@ auto submit_kernel_direct_parallel_for( return submit_kernel_direct( - Queue, Range, std::forward(KernelFunc), DepEvents, - Props, CodeLoc); + Queue, detail::nd_range_view(Range), + std::forward(KernelFunc), DepEvents, Props, + CodeLoc); } template ( - Queue, nd_range<1>{1, 1}, + Queue, detail::nd_range_view(), std::forward(KernelFunc), DepEvents, Props, CodeLoc); } diff --git a/sycl/source/detail/ndrange_desc.hpp b/sycl/source/detail/ndrange_desc.hpp index 30108d729db31..5426f2e09d43f 100644 --- a/sycl/source/detail/ndrange_desc.hpp +++ b/sycl/source/detail/ndrange_desc.hpp @@ -32,51 +32,62 @@ class NDRDescT { NDRDescT(const NDRDescT &Desc) = default; NDRDescT(NDRDescT &&Desc) = default; - template - NDRDescT(sycl::range N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} { + NDRDescT(const size_t *N, bool SetNumWorkGroups, int DimsVal) + : Dims{size_t(DimsVal)} { if (SetNumWorkGroups) { - for (size_t I = 0; I < Dims_; ++I) { + for (size_t I = 0; I < Dims; ++I) { NumWorkGroups[I] = N[I]; } } else { - for (size_t I = 0; I < Dims_; ++I) { + for (size_t I = 0; I < Dims; ++I) { GlobalSize[I] = N[I]; } - for (int I = Dims_; I < 3; ++I) { + for (int I = Dims; I < 3; ++I) { GlobalSize[I] = 1; } } } template - NDRDescT(sycl::range NumWorkItems, sycl::range LocalSizes, - sycl::id Offset) - : Dims{size_t(Dims_)} { - for (size_t I = 0; I < Dims_; ++I) { + NDRDescT(sycl::range N, bool SetNumWorkGroups) + : NDRDescT(&(N[0]), SetNumWorkGroups, Dims_) {} + + NDRDescT(const size_t *NumWorkItems, const size_t *LocalSizes, + const size_t *Offset, int DimsVal) + : Dims{size_t(DimsVal)} { + for (size_t I = 0; I < Dims; ++I) { GlobalSize[I] = NumWorkItems[I]; LocalSize[I] = LocalSizes[I]; GlobalOffset[I] = Offset[I]; } - for (int I = Dims_; I < 3; ++I) { + for (int I = Dims; I < 3; ++I) { LocalSize[I] = LocalSizes[0] ? 1 : 0; } - for (int I = Dims_; I < 3; ++I) { + for (int I = Dims; I < 3; ++I) { GlobalSize[I] = 1; } } template - NDRDescT(sycl::range NumWorkItems, sycl::id Offset) - : Dims{size_t(Dims_)} { - for (size_t I = 0; I < Dims_; ++I) { + NDRDescT(sycl::range NumWorkItems, sycl::range LocalSizes, + sycl::id Offset) + : NDRDescT(&(NumWorkItems[0]), &(LocalSizes[0]), &(Offset[0]), Dims_) {} + + NDRDescT(const size_t *NumWorkItems, const size_t *Offset, int DimsVal) + : Dims{size_t(DimsVal)} { + for (size_t I = 0; I < Dims; ++I) { GlobalSize[I] = NumWorkItems[I]; GlobalOffset[I] = Offset[I]; } } + template + NDRDescT(sycl::range NumWorkItems, sycl::id Offset) + : NDRDescT(&(NumWorkItems[0]), &(Offset[0]), Dims_) {} + template NDRDescT(sycl::nd_range ExecutionRange) : NDRDescT(ExecutionRange.get_global_range(), diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5b7bfb5e90fae..980fcc65bfc40 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -126,6 +127,18 @@ prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) { return detail::createSyclObjFromImpl(EventImpl); } +sycl::detail::NDRDescT nd_range_view::toNDRDescT() const { + if (!MGlobalSize) { + return NDRDescT(nd_range<1>{1, 1}); + } else if (MLocalSize) { + return NDRDescT(MGlobalSize, MLocalSize, MOffset, MDims); + } else if (MOffset) { + return NDRDescT(MGlobalSize, MOffset, MDims); + } else { + return NDRDescT(MGlobalSize, MSetNumWorkGroups, MDims); + } +} + const std::vector & queue_impl::getExtendDependencyList(const std::vector &DepEvents, std::vector &MutableVec, diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7c793b619ecab..b44a5bbfc3ea7 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -360,29 +360,27 @@ class queue_impl : public std::enable_shared_from_this { return createSyclObjFromImpl(ResEvent); } - template event submit_kernel_direct_with_event( - const nd_range &Range, detail::HostKernelRefBase &HostKernel, + detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::EventImplPtr EventImpl = submit_kernel_direct_impl( - NDRDescT{Range}, HostKernel, DeviceKernelInfo, + RangeView.toNDRDescT(), HostKernel, DeviceKernelInfo, /*CallerNeedsEvent*/ true, DepEvents, Props, CodeLoc, IsTopCodeLoc); return createSyclObjFromImpl(EventImpl); } - template void submit_kernel_direct_without_event( - const nd_range &Range, detail::HostKernelRefBase &HostKernel, + detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - submit_kernel_direct_impl(NDRDescT{Range}, HostKernel, DeviceKernelInfo, - /*CallerNeedsEvent*/ false, DepEvents, Props, - CodeLoc, IsTopCodeLoc); + submit_kernel_direct_impl( + RangeView.toNDRDescT(), HostKernel, DeviceKernelInfo, + /*CallerNeedsEvent*/ false, DepEvents, Props, CodeLoc, IsTopCodeLoc); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index a58b4b234ab3a..e855cf37ef1eb 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -463,80 +463,30 @@ void queue::ext_oneapi_set_external_event(const event &external_event) { const property_list &queue::getPropList() const { return impl->getPropList(); } -template event submit_kernel_direct_with_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { return getSyclObjImpl(Queue)->submit_kernel_direct_with_event( - Range, HostKernel, DeviceKernelInfo, DepEvents, Props, CodeLoc, + RangeView, HostKernel, DeviceKernelInfo, DepEvents, Props, CodeLoc, IsTopCodeLoc); } -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<1>( - const queue &Queue, const nd_range<1> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - sycl::span DepEvents, - const detail::KernelPropertyHolderStructTy &Props, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<2>( - const queue &Queue, const nd_range<2> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - sycl::span DepEvents, - const detail::KernelPropertyHolderStructTy &Props, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template event __SYCL_EXPORT submit_kernel_direct_with_event_impl<3>( - const queue &Queue, const nd_range<3> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - sycl::span DepEvents, - const detail::KernelPropertyHolderStructTy &Props, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template void submit_kernel_direct_without_event_impl( - const queue &Queue, const nd_range &Range, + const queue &Queue, detail::nd_range_view RangeView, detail::HostKernelRefBase &HostKernel, detail::DeviceKernelInfo *DeviceKernelInfo, sycl::span DepEvents, const detail::KernelPropertyHolderStructTy &Props, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { getSyclObjImpl(Queue)->submit_kernel_direct_without_event( - Range, HostKernel, DeviceKernelInfo, DepEvents, Props, CodeLoc, + RangeView, HostKernel, DeviceKernelInfo, DepEvents, Props, CodeLoc, IsTopCodeLoc); } -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<1>( - const queue &Queue, const nd_range<1> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - sycl::span DepEvents, - const detail::KernelPropertyHolderStructTy &Props, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<2>( - const queue &Queue, const nd_range<2> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - sycl::span DepEvents, - const detail::KernelPropertyHolderStructTy &Props, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - -template void __SYCL_EXPORT submit_kernel_direct_without_event_impl<3>( - const queue &Queue, const nd_range<3> &Range, - detail::HostKernelRefBase &HostKernel, - detail::DeviceKernelInfo *DeviceKernelInfo, - sycl::span DepEvents, - const detail::KernelPropertyHolderStructTy &Props, - const detail::code_location &CodeLoc, bool IsTopCodeLoc); - } // namespace _V1 } // namespace sycl diff --git a/sycl/test/abi/layout_nd_range_view.cpp b/sycl/test/abi/layout_nd_range_view.cpp new file mode 100644 index 0000000000000..2a1d0693dab26 --- /dev/null +++ b/sycl/test/abi/layout_nd_range_view.cpp @@ -0,0 +1,19 @@ +// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s +// REQUIRES: linux +// UNSUPPORTED: libcxx + +// clang-format off + +#include + + +SYCL_EXTERNAL void nd_range_view(sycl::detail::nd_range_view) {} +// CHECK: 0 | class sycl::detail::nd_range_view +// CHECK-NEXT: 0 | const size_t * MGlobalSize +// CHECK-NEXT: 8 | const size_t * MLocalSize +// CHECK-NEXT: 16 | const size_t * MOffset +// CHECK-NEXT: 24 | _Bool MSetNumWorkGroups +// CHECK-NEXT: 32 | size_t MDims +// CHECK-NEXT: | [sizeof=40, dsize=40, align=8, +// CHECK-NEXT: | nvsize=40, nvalign=8] diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index e42759431d374..f7a32763da179 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2985,12 +2985,8 @@ _ZN4sycl3_V121__isgreaterequal_implEdd _ZN4sycl3_V121__isgreaterequal_implEff _ZN4sycl3_V122accelerator_selector_vERKNS0_6deviceE _ZN4sycl3_V128verifyUSMAllocatorPropertiesERKNS0_13property_listE -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi1EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi2EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V136submit_kernel_direct_with_event_implILi3EEENS0_5eventERKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNSA_16DeviceKernelInfoENS0_4spanIKS2_Lm18446744073709551615EEERKNSA_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNSA_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi1EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi2EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb -_ZN4sycl3_V139submit_kernel_direct_without_event_implILi3EEEvRKNS0_5queueERKNS0_8nd_rangeIXT_EEERNS0_6detail17HostKernelRefBaseEPNS9_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS9_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSK_5intel12experimental12cache_configENSM_17use_root_sync_keyENSM_23work_group_progress_keyENSM_22sub_group_progress_keyENSM_22work_item_progress_keyENSM_4cuda12cluster_sizeILi1EEENSW_ILi2EEENSW_ILi3EEEEEERKNS9_13code_locationEb +_ZN4sycl3_V136submit_kernel_direct_with_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS4_13code_locationEb +_ZN4sycl3_V139submit_kernel_direct_without_event_implERKNS0_5queueENS0_6detail13nd_range_viewERNS4_17HostKernelRefBaseEPNS4_16DeviceKernelInfoENS0_4spanIKNS0_5eventELm18446744073709551615EEERKNS4_27kernel_launch_properties_v111PropsHolderIJNS0_3ext6oneapi12experimental23work_group_scratch_sizeENSG_5intel12experimental12cache_configENSI_17use_root_sync_keyENSI_23work_group_progress_keyENSI_22sub_group_progress_keyENSI_22work_item_progress_keyENSI_4cuda12cluster_sizeILi1EEENSS_ILi2EEENSS_ILi3EEEEEERKNS4_13code_locationEb _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 1db98f1b5cf9d..49387922d3fbc 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -286,12 +286,6 @@ ??$is_image_handle_supported@Usampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVdevice@34@AEBVcontext@34@@Z ??$is_image_handle_supported@Uunsampled_image_handle@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA_NAEBUimage_descriptor@01234@W4image_memory_handle_type@01234@AEBVqueue@34@@Z -??$submit_kernel_direct_with_event_impl@$00@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$01@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_with_event_impl@$02@_V1@sycl@@YA?AVevent@01@AEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@601@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@601@AEBUcode_location@601@_N@Z -??$submit_kernel_direct_without_event_impl@$00@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$00@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$01@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$01@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z -??$submit_kernel_direct_without_event_impl@$02@_V1@sycl@@YAXAEBVqueue@01@AEBV?$nd_range@$02@01@AEAVHostKernelRefBase@detail@01@PEAVDeviceKernelInfo@501@V?$span@$$CBVevent@_V1@sycl@@$0?0@01@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@501@AEBUcode_location@501@_N@Z ??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z ??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z ??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z @@ -4484,6 +4478,8 @@ ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z +?submit_kernel_direct_with_event_impl@_V1@sycl@@YA?AVevent@12@AEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@612@PEAVDeviceKernelInfo@612@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@612@AEBUcode_location@612@_N@Z +?submit_kernel_direct_without_event_impl@_V1@sycl@@YAXAEBVqueue@12@Vnd_range_view@detail@12@AEAVHostKernelRefBase@512@PEAVDeviceKernelInfo@512@V?$span@$$CBVevent@_V1@sycl@@$0?0@12@AEBU?$PropsHolder@Uwork_group_scratch_size@experimental@oneapi@ext@_V1@sycl@@Ucache_config@2intel@456@Uuse_root_sync_key@23456@Uwork_group_progress_key@23456@Usub_group_progress_key@23456@Uwork_item_progress_key@23456@U?$cluster_size@$00@cuda@23456@U?$cluster_size@$01@cuda@23456@U?$cluster_size@$02@cuda@23456@@kernel_launch_properties_v1@512@AEBUcode_location@512@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@AEBVtype_erased_cgfo_ty@detail@23@AEBVSubmissionInfo@v1@623@AEBUcode_location@623@_N@Z ?submit_with_event_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBVSubmissionInfo@detail@23@AEBUcode_location@823@_N@Z diff --git a/sycl/test/abi/symbol_size_alignment.cpp b/sycl/test/abi/symbol_size_alignment.cpp index 63286da1b9786..36bf933ce6b28 100644 --- a/sycl/test/abi/symbol_size_alignment.cpp +++ b/sycl/test/abi/symbol_size_alignment.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -58,6 +59,7 @@ int main() { #endif check, 16, 8>(); check(); + check(); check(); #ifdef __SYCL_DEVICE_ONLY__ check, 4, 4>(); diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 5174c8a29bb6b..a0bc25739c465 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -98,6 +98,7 @@ // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: device.hpp // CHECK-NEXT: detail/string_view.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp index 76570a99bdda7..9cd74ac24ca78 100644 --- a/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_queue.hpp.cpp @@ -102,6 +102,7 @@ // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: device.hpp // CHECK-NEXT: detail/string_view.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index 50abdf954cca0..65e5d95389e72 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -180,6 +180,7 @@ // CHECK-NEXT: kernel.hpp // CHECK-NEXT: sampler.hpp // CHECK-NEXT: queue.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp // CHECK-NEXT: sycl_span.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp index f2feef5bd9871..5534c9b9fe6ee 100644 --- a/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_usm.hpp.cpp @@ -122,6 +122,7 @@ // CHECK-NEXT: nd_item.hpp // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp +// CHECK-NEXT: detail/nd_range_view.hpp // CHECK-NEXT: detail/optional.hpp // CHECK-NEXT: event.hpp // CHECK-NEXT: exception_list.hpp diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index afc0e185eb7c0..9041793ecdaf2 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -22,4 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT AccessorDefaultCtor.cpp HostTaskAndBarrier.cpp BarrierDependencies.cpp + NdRangeViewUsage.cpp ) diff --git a/sycl/unittests/scheduler/NdRangeViewUsage.cpp b/sycl/unittests/scheduler/NdRangeViewUsage.cpp new file mode 100644 index 0000000000000..dd1e0659cb607 --- /dev/null +++ b/sycl/unittests/scheduler/NdRangeViewUsage.cpp @@ -0,0 +1,118 @@ +//==---- NdRangeViewUsage.cpp --- Check nd_range_view ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +#include + +template +void TestNdRangeView(sycl::range global, sycl::range local, + sycl::id offset) { + { + sycl::nd_range nd_range{global, local, offset}; + sycl::detail::nd_range_view r{nd_range}; + ASSERT_EQ(r.MDims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MLocalSize[d], local[d]); + ASSERT_EQ(r.MOffset[d], offset[d]); + } + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); + ASSERT_EQ(NDRDesc.LocalSize[d], local[d]); + ASSERT_EQ(NDRDesc.GlobalOffset[d], offset[d]); + } + + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], 1UL); + ASSERT_EQ(NDRDesc.LocalSize[d], 1UL); + } + } + { + sycl::detail::nd_range_view r{global, offset}; + ASSERT_EQ(r.MDims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MOffset[d], offset[d]); + } + ASSERT_EQ(r.MLocalSize, nullptr); + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); + ASSERT_EQ(NDRDesc.GlobalOffset[d], offset[d]); + } + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); + } + for (int d = 0; d < 3; d++) { + ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + } + } + { + sycl::detail::nd_range_view r{global, true}; + ASSERT_EQ(r.MDims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MSetNumWorkGroups, true); + } + ASSERT_EQ(r.MLocalSize, nullptr); + ASSERT_EQ(r.MOffset, nullptr); + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.NumWorkGroups[d], global[d]); + } + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.NumWorkGroups[d], 0UL); + } + for (int d = 0; d < 3; d++) { + ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalSize[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); + } + } + { + sycl::detail::nd_range_view r{global, false}; + ASSERT_EQ(r.MDims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(r.MGlobalSize[d], global[d]); + ASSERT_EQ(r.MSetNumWorkGroups, false); + } + ASSERT_EQ(r.MLocalSize, nullptr); + ASSERT_EQ(r.MOffset, nullptr); + + sycl::detail::NDRDescT NDRDesc = r.toNDRDescT(); + ASSERT_EQ(NDRDesc.Dims, size_t{dims}); + for (int d = 0; d < dims; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], global[d]); + } + for (int d = dims; d < 3; d++) { + ASSERT_EQ(NDRDesc.GlobalSize[d], 1UL); + } + for (int d = 0; d < 3; d++) { + ASSERT_EQ(NDRDesc.LocalSize[d], 0UL); + ASSERT_EQ(NDRDesc.NumWorkGroups[d], 0UL); + ASSERT_EQ(NDRDesc.GlobalOffset[d], 0UL); + } + } +} + +TEST(RangesRefUsage, RangesRefUsage) { + TestNdRangeView(sycl::range<1>{1024}, sycl::range<1>{64}, sycl::id<1>{10}); + TestNdRangeView(sycl::range<2>{1024, 512}, sycl::range<2>{64, 32}, + sycl::id<2>{10, 5}); + TestNdRangeView(sycl::range<3>{1024, 512, 256}, sycl::range<3>{64, 32, 16}, + sycl::id<3>{10, 5, 2}); +}