Skip to content

Commit 0fa4d7c

Browse files
committed
Do not call addPendingMemoryAllocation() in appendKernelLaunchWithArgsExpNew()
Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com>
1 parent e05e82b commit 0fa4d7c

File tree

4 files changed

+59
-49
lines changed

4 files changed

+59
-49
lines changed

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,9 @@ ur_command_list_manager::getSignalEvent(ur_event_handle_t hUserEvent,
154154
ur_result_t ur_command_list_manager::appendKernelLaunchLocked(
155155
ur_kernel_handle_t hKernel, ze_kernel_handle_t hZeKernel, uint32_t workDim,
156156
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
157-
const size_t *pLocalWorkSize, uint32_t numEventsInWaitList,
158-
const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent,
159-
bool cooperative, std::vector<void *> *pKMemObj, void *pNext) {
157+
const size_t *pLocalWorkSize, wait_list_view &waitListView,
158+
ur_event_handle_t phEvent, bool cooperative, std::vector<void *> *pKMemObj,
159+
void *pNext) {
160160

161161
ze_group_count_t zeThreadGroupDimensions{1, 1, 1};
162162
uint32_t WG[3]{};
@@ -165,11 +165,10 @@ ur_result_t ur_command_list_manager::appendKernelLaunchLocked(
165165
pGlobalWorkSize, pLocalWorkSize));
166166

167167
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH);
168-
auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);
169168

170169
UR_CALL(hKernel->prepareForSubmission(
171170
hContext.get(), hDevice.get(), pGlobalWorkOffset, workDim, WG[0], WG[1],
172-
WG[2], getZeCommandList(), waitListView, pKMemObj));
171+
WG[2], getZeCommandList(), waitListView));
173172

174173
if (pKMemObj) {
175174
// zeCommandListAppendLaunchKernelWithArguments
@@ -231,11 +230,13 @@ ur_result_t ur_command_list_manager::appendKernelLaunchUnlocked(
231230

232231
std::scoped_lock<ur_shared_mutex> Lock(hKernel->Mutex);
233232

233+
wait_list_view waitListView =
234+
getWaitListView(phEventWaitList, numEventsInWaitList);
235+
234236
// last arguments: pKMemObj == nullptr and pNext == nullptr
235-
return appendKernelLaunchLocked(hKernel, hZeKernel, workDim,
236-
pGlobalWorkOffset, pGlobalWorkSize,
237-
pLocalWorkSize, numEventsInWaitList,
238-
phEventWaitList, phEvent, cooperative);
237+
return appendKernelLaunchLocked(
238+
hKernel, hZeKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
239+
pLocalWorkSize, waitListView, phEvent, cooperative);
239240
}
240241

241242
ur_result_t ur_command_list_manager::appendKernelLaunch(
@@ -1164,6 +1165,9 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11641165
hKernel->kernelMemObj.resize(numArgs, 0);
11651166
hKernel->kernelArgs.resize(numArgs, 0);
11661167

1168+
wait_list_view waitListView =
1169+
getWaitListView(phEventWaitList, numEventsInWaitList);
1170+
11671171
for (uint32_t argIndex = 0; argIndex < numArgs; argIndex++) {
11681172
switch (pArgs[argIndex].type) {
11691173
case UR_EXP_KERNEL_ARG_TYPE_LOCAL:
@@ -1176,12 +1180,13 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11761180
hKernel->kernelArgs[argIndex] = (void *)&pArgs[argIndex].value.pointer;
11771181
break;
11781182
case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ:
1179-
// prepareForSubmission() will save zePtr in kernelMemObj[argIndex]
1183+
// compute zePtr for the given memory handle and store it in
1184+
// hKernel->kernelMemObj[argIndex]
1185+
UR_CALL(hKernel->computeZePtr(
1186+
pArgs[argIndex].value.memObjTuple.hMem, hDevice.get(),
1187+
ur_mem_buffer_t::device_access_mode_t::read_write, getZeCommandList(),
1188+
waitListView, &hKernel->kernelMemObj[argIndex]));
11801189
hKernel->kernelArgs[argIndex] = &hKernel->kernelMemObj[argIndex];
1181-
UR_CALL(hKernel->addPendingMemoryAllocation(
1182-
{pArgs[argIndex].value.memObjTuple.hMem,
1183-
ur_mem_buffer_t::device_access_mode_t::read_write,
1184-
pArgs[argIndex].index}));
11851190
break;
11861191
case UR_EXP_KERNEL_ARG_TYPE_SAMPLER:
11871192
hKernel->kernelArgs[argIndex] = &pArgs[argIndex].value.sampler->ZeSampler;
@@ -1193,8 +1198,8 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11931198

11941199
return appendKernelLaunchLocked(
11951200
hKernel, hZeKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
1196-
pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent,
1197-
cooperativeKernelLaunchRequested, &hKernel->kernelMemObj, pNext);
1201+
pLocalWorkSize, waitListView, phEvent, cooperativeKernelLaunchRequested,
1202+
&hKernel->kernelMemObj, pNext);
11981203
}
11991204

12001205
ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExp(

unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,7 @@ struct ur_command_list_manager {
280280
ur_kernel_handle_t hKernel, ze_kernel_handle_t hZeKernel,
281281
uint32_t workDim, const size_t *pGlobalWorkOffset,
282282
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
283-
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
284-
ur_event_handle_t phEvent, bool cooperative,
283+
wait_list_view &waitListView, ur_event_handle_t phEvent, bool cooperative,
285284
std::vector<void *> *pKMemObj = nullptr, void *pNext = nullptr);
286285

287286
ur_result_t appendKernelLaunchUnlocked(

unified-runtime/source/adapters/level_zero/v2/kernel.cpp

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -265,13 +265,36 @@ ur_result_t ur_kernel_handle_t_::setExecInfo(ur_kernel_exec_info_t propName,
265265
return UR_RESULT_SUCCESS;
266266
}
267267

268+
// Compute a zePtr pointer for the given memory handle and store it in *pZePtr
269+
ur_result_t ur_kernel_handle_t_::computeZePtr(
270+
ur_mem_handle_t hMem, ur_device_handle_t hDevice,
271+
ur_mem_buffer_t::device_access_mode_t accessMode,
272+
ze_command_list_handle_t zeCommandList, wait_list_view &waitListView,
273+
void **pZePtr) {
274+
UR_ASSERT(pZePtr, UR_RESULT_ERROR_INVALID_NULL_POINTER);
275+
276+
void *zePtr = nullptr;
277+
if (hMem) {
278+
if (!hMem->isImage()) {
279+
auto hBuffer = hMem->getBuffer();
280+
zePtr = hBuffer->getDevicePtr(hDevice, accessMode, 0, hBuffer->getSize(),
281+
zeCommandList, waitListView);
282+
} else {
283+
auto hImage = static_cast<ur_mem_image_t *>(hMem->getImage());
284+
zePtr = reinterpret_cast<void *>(hImage->getZeImage());
285+
}
286+
}
287+
288+
*pZePtr = zePtr;
289+
return UR_RESULT_SUCCESS;
290+
}
291+
268292
// Perform any required allocations and set the kernel arguments.
269293
ur_result_t ur_kernel_handle_t_::prepareForSubmission(
270294
ur_context_handle_t hContext, ur_device_handle_t hDevice,
271295
const size_t *pGlobalWorkOffset, uint32_t workDim, uint32_t groupSizeX,
272296
uint32_t groupSizeY, uint32_t groupSizeZ,
273-
ze_command_list_handle_t commandList, wait_list_view &waitListView,
274-
std::vector<void *> *kMemObj) {
297+
ze_command_list_handle_t commandList, wait_list_view &waitListView) {
275298
auto &deviceKernelOpt = deviceKernels[deviceIndex(hDevice)];
276299
if (!deviceKernelOpt.has_value())
277300
return UR_RESULT_ERROR_INVALID_KERNEL;
@@ -288,34 +311,12 @@ ur_result_t ur_kernel_handle_t_::prepareForSubmission(
288311

289312
for (auto &pending : pending_allocations) {
290313
void *zePtr = nullptr;
291-
if (pending.hMem) {
292-
if (!pending.hMem->isImage()) {
293-
auto hBuffer = pending.hMem->getBuffer();
294-
zePtr =
295-
hBuffer->getDevicePtr(hDevice, pending.mode, 0, hBuffer->getSize(),
296-
commandList, waitListView);
297-
} else {
298-
auto hImage = static_cast<ur_mem_image_t *>(pending.hMem->getImage());
299-
zePtr = reinterpret_cast<void *>(hImage->getZeImage());
300-
}
301-
}
314+
// Compute a zePtr pointer for the given memory handle and store it in zePtr
315+
UR_CALL(computeZePtr(pending.hMem, hDevice, pending.mode, commandList,
316+
waitListView, &zePtr));
302317

303-
// kMemObj must be non-null in the path of
304-
// zeCommandListAppendLaunchKernelWithArguments()
305-
if (kMemObj) {
306-
// zeCommandListAppendLaunchKernelWithArguments()
307-
// (==CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithArguments())
308-
// calls setArgumentValue(i, argSize, argValue) for all arguments on its
309-
// own so do not call it here, but save the zePtr pointer in kMemObj
310-
// for this future call.
311-
if (pending.argIndex > kMemObj->size() - 1) {
312-
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
313-
}
314-
(*kMemObj)[pending.argIndex] = zePtr;
315-
} else {
316-
// Set the argument only on this device's kernel.
317-
UR_CALL(deviceKernel.setArgPointer(pending.argIndex, zePtr));
318-
}
318+
// Set the argument only on this device's kernel.
319+
UR_CALL(deviceKernel.setArgPointer(pending.argIndex, zePtr));
319320
}
320321
pending_allocations.clear();
321322

unified-runtime/source/adapters/level_zero/v2/kernel.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ struct ur_kernel_handle_t_ : ur_object {
9494
ur_result_t addPendingPointerArgument(uint32_t argIndex,
9595
const void *pArgValue);
9696

97+
// Compute a zePtr pointer for the given memory handle and store it in *pZePtr
98+
ur_result_t computeZePtr(ur_mem_handle_t hMem, ur_device_handle_t hDevice,
99+
ur_mem_buffer_t::device_access_mode_t accessMode,
100+
ze_command_list_handle_t zeCommandList,
101+
wait_list_view &waitListView, void **pZePtr);
102+
97103
// Set all required values for the kernel before submission (including pending
98104
// memory allocations).
99105
// The kMemObj argument must be a non-empty vector
@@ -104,8 +110,7 @@ struct ur_kernel_handle_t_ : ur_object {
104110
uint32_t workDim, uint32_t groupSizeX,
105111
uint32_t groupSizeY, uint32_t groupSizeZ,
106112
ze_command_list_handle_t cmdList,
107-
wait_list_view &waitListView,
108-
std::vector<void *> *kMemObj = nullptr);
113+
wait_list_view &waitListView);
109114

110115
// Get context of the kernel.
111116
ur_context_handle_t getContext() const { return hProgram->Context; }

0 commit comments

Comments
 (0)