@@ -694,6 +694,108 @@ <h2>Intel® GPU Occupancy Calculator</h2>
694694 "Max_Num_Of_Barrier_Registers" : 32
695695 }
696696} ,
697+ //RPL-P
698+ {
699+ "pci_id" : [ "A7A1" , "A7A0" , "A721" , "A720" ] ,
700+ "name" : "Integrated GPU (Xe LP)" ,
701+ "product_name" : "Intel® Iris® Xe Max Graphics" ,
702+ "code" : "gen12" ,
703+ "device_info" : {
704+ "EU_Per_Sub_Slice" : 16 ,
705+ "Threads_Per_EU" : 7 ,
706+ "EU_Count" : 96 ,
707+ "Max_Threads_Per_Sub_Slice" : 112 ,
708+ "Large_GRF_Mode" : false ,
709+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
710+ "SLM_Size_Per_Sub_Slice" : 64 ,
711+ "SLM_Size_Per_Work_Group" : 64 ,
712+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
713+ "Max_Work_Group_Size" : 512 ,
714+ "Max_Num_Of_Workgroups" : 112 ,
715+ "Max_Num_Of_Barrier_Registers" : 32
716+ }
717+ } ,
718+ {
719+ "pci_id" : [ "A7A9" , "A7A8" ] ,
720+ "name" : "Integrated GPU (Xe LP)" ,
721+ "product_name" : "Intel® Iris® Xe Max Graphics" ,
722+ "code" : "gen12" ,
723+ "device_info" : {
724+ "EU_Per_Sub_Slice" : 16 ,
725+ "Threads_Per_EU" : 7 ,
726+ "EU_Count" : 64 ,
727+ "Max_Threads_Per_Sub_Slice" : 112 ,
728+ "Large_GRF_Mode" : false ,
729+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
730+ "SLM_Size_Per_Sub_Slice" : 64 ,
731+ "SLM_Size_Per_Work_Group" : 64 ,
732+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
733+ "Max_Work_Group_Size" : 512 ,
734+ "Max_Num_Of_Workgroups" : 112 ,
735+ "Max_Num_Of_Barrier_Registers" : 32
736+ }
737+ } ,
738+ //RPL-S
739+ {
740+ "pci_id" : [ "A780" , "A781" , "A788" , "A789" ] ,
741+ "name" : "Integrated GPU (Xe LP)" ,
742+ "product_name" : "Intel® UHD Graphics" ,
743+ "code" : "gen12" ,
744+ "device_info" : {
745+ "EU_Per_Sub_Slice" : 16 ,
746+ "Threads_Per_EU" : 7 ,
747+ "EU_Count" : 32 ,
748+ "Max_Threads_Per_Sub_Slice" : 112 ,
749+ "Large_GRF_Mode" : false ,
750+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
751+ "SLM_Size_Per_Sub_Slice" : 64 ,
752+ "SLM_Size_Per_Work_Group" : 64 ,
753+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
754+ "Max_Work_Group_Size" : 512 ,
755+ "Max_Num_Of_Workgroups" : 112 ,
756+ "Max_Num_Of_Barrier_Registers" : 32
757+ }
758+ } ,
759+ {
760+ "pci_id" : [ "A782" , "A78A" ] ,
761+ "name" : "Integrated GPU (Xe LP)" ,
762+ "product_name" : "Intel® UHD Graphics" ,
763+ "code" : "gen12" ,
764+ "device_info" : {
765+ "EU_Per_Sub_Slice" : 12 ,
766+ "Threads_Per_EU" : 7 ,
767+ "EU_Count" : 24 ,
768+ "Max_Threads_Per_Sub_Slice" : 112 ,
769+ "Large_GRF_Mode" : false ,
770+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
771+ "SLM_Size_Per_Sub_Slice" : 64 ,
772+ "SLM_Size_Per_Work_Group" : 64 ,
773+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
774+ "Max_Work_Group_Size" : 512 ,
775+ "Max_Num_Of_Workgroups" : 112 ,
776+ "Max_Num_Of_Barrier_Registers" : 32
777+ }
778+ } ,
779+ {
780+ "pci_id" : [ "A783" , "A78B" ] ,
781+ "name" : "Integrated GPU (Xe LP)" ,
782+ "product_name" : "Intel® UHD Graphics" ,
783+ "code" : "gen12" ,
784+ "device_info" : {
785+ "EU_Per_Sub_Slice" : 16 ,
786+ "Threads_Per_EU" : 7 ,
787+ "EU_Count" : 16 ,
788+ "Max_Threads_Per_Sub_Slice" : 112 ,
789+ "Large_GRF_Mode" : false ,
790+ "Subgroup_Sizes" : [ 32 , 16 , 8 ] ,
791+ "SLM_Size_Per_Sub_Slice" : 64 ,
792+ "SLM_Size_Per_Work_Group" : 64 ,
793+ "TG_SLM_Sizes" : [ 0 , 1 , 2 , 4 , 8 , 16 , 32 , 64 ] ,
794+ "Max_Work_Group_Size" : 512 ,
795+ "Max_Num_Of_Workgroups" : 112 ,
796+ "Max_Num_Of_Barrier_Registers" : 32
797+ }
798+ } ,
697799//ARC
698800{
699801 "pci_id" : [ "56A5" , "5694" ] ,
@@ -1256,23 +1358,21 @@ <h2>Intel® GPU Occupancy Calculator</h2>
12561358}
12571359
12581360// Calculate GPU Occupancy
1361+
12591362function compute_gpu_occupancy ( wg , num_wg , ss_occ , num_ss , global_range ) {
12601363 var gpu_occ ;
12611364 // Calculate max num of work-items in all ss of gpu
12621365 var num_wi = num_ss * num_wg * wg ;
12631366 // Calculate occupancy for all ss in gpu
1264- if ( global_range > num_wi ) {
1265- var num_pass = parseInt ( global_range / num_wi ) ;
1266- gpu_occ = ss_occ * num_pass ;
1267- if ( global_range % num_wi !== 0 ) {
1268- num_pass += 1 ;
1269- }
1270- gpu_occ = gpu_occ / num_pass ;
1271- return { gpu_occ, ss_occ}
1272- } else {
1273- gpu_occ = ( global_range / num_wi ) * ss_occ ;
1274- return { gpu_occ, ss_occ}
1275- }
1367+ var num_pass = parseInt ( global_range / num_wi ) ;
1368+ var gpu_pass = ss_occ * num_pass ;
1369+ var num_wi_left = global_range % num_wi ;
1370+ if ( num_wi_left !== 0 ) {
1371+ gpu_pass += ( num_wi_left / num_wi ) * ss_occ ;
1372+ num_pass += 1 ;
1373+ }
1374+ gpu_occ = gpu_pass / num_pass ;
1375+ return { gpu_occ, ss_occ}
12761376}
12771377
12781378// Generate Graphs and Optimal Occupancy Config table
0 commit comments