@@ -154,9 +154,9 @@ ur_command_list_manager::getSignalEvent(ur_event_handle_t hUserEvent,
154154ur_result_t ur_command_list_manager::appendKernelLaunchLocked (
155155 ur_kernel_handle_t hKernel, ze_kernel_handle_t hZeKernel, uint32_t workDim,
156156 const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
157- const size_t *pLocalWorkSize, uint32_t numEventsInWaitList ,
158- const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent ,
159- bool cooperative, std::vector< void *> *pKMemObj, void *pNext) {
157+ const size_t *pLocalWorkSize, wait_list_view &waitListView ,
158+ ur_event_handle_t phEvent, bool cooperative, std::vector< void *> *pKMemObj ,
159+ void *pNext) {
160160
161161 ze_group_count_t zeThreadGroupDimensions{1 , 1 , 1 };
162162 uint32_t WG[3 ]{};
@@ -165,11 +165,10 @@ ur_result_t ur_command_list_manager::appendKernelLaunchLocked(
165165 pGlobalWorkSize, pLocalWorkSize));
166166
167167 auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_KERNEL_LAUNCH);
168- auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
169168
170169 UR_CALL (hKernel->prepareForSubmission (
171170 hContext.get (), hDevice.get (), pGlobalWorkOffset, workDim, WG[0 ], WG[1 ],
172- WG[2 ], getZeCommandList (), waitListView, pKMemObj ));
171+ WG[2 ], getZeCommandList (), waitListView));
173172
174173 if (pKMemObj) {
175174 // zeCommandListAppendLaunchKernelWithArguments
@@ -231,11 +230,13 @@ ur_result_t ur_command_list_manager::appendKernelLaunchUnlocked(
231230
232231 std::scoped_lock<ur_shared_mutex> Lock (hKernel->Mutex );
233232
233+ wait_list_view waitListView =
234+ getWaitListView (phEventWaitList, numEventsInWaitList);
235+
234236 // last arguments: pKMemObj == nullptr and pNext == nullptr
235- return appendKernelLaunchLocked (hKernel, hZeKernel, workDim,
236- pGlobalWorkOffset, pGlobalWorkSize,
237- pLocalWorkSize, numEventsInWaitList,
238- phEventWaitList, phEvent, cooperative);
237+ return appendKernelLaunchLocked (
238+ hKernel, hZeKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
239+ pLocalWorkSize, waitListView, phEvent, cooperative);
239240}
240241
241242ur_result_t ur_command_list_manager::appendKernelLaunch (
@@ -1164,6 +1165,9 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11641165 hKernel->kernelMemObj .resize (numArgs, 0 );
11651166 hKernel->kernelArgs .resize (numArgs, 0 );
11661167
1168+ wait_list_view waitListView =
1169+ getWaitListView (phEventWaitList, numEventsInWaitList);
1170+
11671171 for (uint32_t argIndex = 0 ; argIndex < numArgs; argIndex++) {
11681172 switch (pArgs[argIndex].type ) {
11691173 case UR_EXP_KERNEL_ARG_TYPE_LOCAL:
@@ -1176,12 +1180,13 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11761180 hKernel->kernelArgs [argIndex] = (void *)&pArgs[argIndex].value .pointer ;
11771181 break ;
11781182 case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ:
1179- // prepareForSubmission() will save zePtr in kernelMemObj[argIndex]
1183+ // compute zePtr for the given memory handle and store it in
1184+ // hKernel->kernelMemObj[argIndex]
1185+ UR_CALL (hKernel->computeZePtr (
1186+ pArgs[argIndex].value .memObjTuple .hMem , hDevice.get (),
1187+ ur_mem_buffer_t ::device_access_mode_t ::read_write, getZeCommandList (),
1188+ waitListView, &hKernel->kernelMemObj [argIndex]));
11801189 hKernel->kernelArgs [argIndex] = &hKernel->kernelMemObj [argIndex];
1181- UR_CALL (hKernel->addPendingMemoryAllocation (
1182- {pArgs[argIndex].value .memObjTuple .hMem ,
1183- ur_mem_buffer_t ::device_access_mode_t ::read_write,
1184- pArgs[argIndex].index }));
11851190 break ;
11861191 case UR_EXP_KERNEL_ARG_TYPE_SAMPLER:
11871192 hKernel->kernelArgs [argIndex] = &pArgs[argIndex].value .sampler ->ZeSampler ;
@@ -1193,8 +1198,8 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11931198
11941199 return appendKernelLaunchLocked (
11951200 hKernel, hZeKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
1196- pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent ,
1197- cooperativeKernelLaunchRequested, &hKernel->kernelMemObj , pNext);
1201+ pLocalWorkSize, waitListView, phEvent, cooperativeKernelLaunchRequested ,
1202+ &hKernel->kernelMemObj , pNext);
11981203}
11991204
12001205ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExp (
0 commit comments