diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc index 219491a3b00ca..ea1f509eee8bf 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc @@ -1107,10 +1107,10 @@ is shown in a single column in the table below. ==== Intel XMX Supported Combinations This is currently available in devices with the architecture `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_dg2_g10`, -`architecture::intel_gpu_dg2_g11`, `architecture::intel_gpu_dg2_g12`, -`architecture::intel_gpu_arl_h`, `architecture::intel_gpu_ptl_h`, and -`architecture::intel_gpu_ptl_u`. +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_dg2_g10`, `architecture::intel_gpu_dg2_g11`, +`architecture::intel_gpu_dg2_g12`, `architecture::intel_gpu_arl_h`, +`architecture::intel_gpu_ptl_h`, and `architecture::intel_gpu_ptl_u`. [frame="none",options="header"] |====================== @@ -1118,40 +1118,40 @@ This is currently available in devices with the architecture .2+| `matrix_type::uint8` .2+| `matrix_type::uint8` .2+| `matrix_type::sint32` .2+| `matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 |`architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` |8|`architecture::intel_gpu_dg2_g10, architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`, `architecture::intel_gpu_arl_h` .2+| `matrix_type::uint8` .2+| `matrix_type::sint8` .2+| `matrix_type::sint32` .2+|`matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 | `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` |8|`architecture::intel_gpu_dg2_g10, architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`, `architecture::intel_gpu_arl_h` .2+| `matrix_type::sint8` .2+| `matrix_type::uint8` .2+| `matrix_type::sint32` .2+|`matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 | `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` |8|`architecture::intel_gpu_dg2_g10, architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`, `architecture::intel_gpu_arl_h` .2+| `matrix_type::sint8` .2+| `matrix_type::sint8` .2+| `matrix_type::sint32` .2+| `matrix_type::sint32` .2+| +<=+ 8 | 16 .2+| 32 | `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` |8|`architecture::intel_gpu_dg2_g10, architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`, `architecture::intel_gpu_arl_h` .8+|`matrix_type::fp16` .8+| `matrix_type::fp16` .8+| `matrix_type::fp32` .8+|`matrix_type::fp32` .1+| 16 .1+| 16 | 16 .6+|`architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` .2+| 1 .2+| 64 | 16 |32 .2+| 32 .2+| 64 | 16 |32 .2+| +<=+ 8 | 16 .2+| 16 @@ -1162,28 +1162,28 @@ architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`, .6+|`matrix_type::fp16` .6+| `matrix_type::fp16` .6+| `matrix_type::fp16` .6+|`matrix_type::fp32` .1+| +<=+ 8 | 16 .1+| 16 .6+| `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` | 16 | 16 | 16 .2+| 1 .2+| 64 | 16 | 32 .2+| 32 .2+| 64 | 16 | 32 .6+|`matrix_type::fp16` .6+| `matrix_type::fp16` .6+| `matrix_type::fp32` .6+|`matrix_type::fp16` .1+| +<=+ 8 | 16 .1+| 16 .6+|`architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` | 16 | 16 | 16 .2+| 1 .2+| 64 | 16 | 32 .2+| 32 .2+| 64 |16 | 32 .6+|`matrix_type::fp16` .6+| `matrix_type::fp16` .6+| `matrix_type::fp16` .6+|`matrix_type::fp16` .1+| +<=+ 8 | 16 .1+| 16 .6+|`architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` | 16 | 16 | 16 .2+| 1 .2+| 64 | 16 |32 .2+| 32 .2+| 64 | 16 | 32 .8+| `matrix_type::bf16` .8+| `matrix_type::bf16` .8+| `matrix_type::fp32` .8+| `matrix_type::fp32` | 16 | 16 | 16 .6+|`architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` .2+| 1 .2+| 64 | 16 | 32 .2+| 32 .2+| 64 | 16 |32 .2+| +<=+ 8 | 16 .2+| 16 @@ -1194,34 +1194,35 @@ architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`, .6+|`matrix_type::bf16` .6+| `matrix_type::bf16` .6+| `matrix_type::bf16` .6+|`matrix_type::fp32` .1+| +<=+ 8 | 16 .1+| 16 .6+| `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` | 16 | 16 | 16 .2+| 1 .2+| 64 | 16 | 32 .2+| 32 .2+| 64 |16 | 32 .6+|`matrix_type::bf16` .6+| `matrix_type::bf16` .6+| `matrix_type::fp32` .6+|`matrix_type::bf16` .1+| +<=+ 8 | 16 .1+| 16 .6+| `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` | 16 | 16 | 16 .2+| 1 .2+| 64 | 16 | 32 .2+| 32 .2+| 64 |16 | 32 .6+|`matrix_type::bf16` .6+| `matrix_type::bf16` .6+| `matrix_type::bf16` .6+|`matrix_type::bf16` .1+| +<=+ 8 | 16 .1+| 16 .6+| `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` | 16 | 16 | 16 .2+| 1 .2+| 64 | 16 | 32 .2+| 32 .2+| 64 |16 | 32 | `matrix_type::tf32` | `matrix_type::tf32` | `matrix_type::fp32` .2+| `matrix_type::fp32` | +<=+ 8 | 16 | 8 | `architecture::intel_gpu_pvc`, `architecture::intel_gpu_bmg_g21`, -`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, -`architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g31`, `architecture::intel_gpu_lnl_m`, +`architecture::intel_gpu_ptl_h`, `architecture::intel_gpu_ptl_u` |====================== ===== Restrictions on `architecture::intel_gpu_pvc`, -`architecture::intel_gpu_bmg_g21`, `architecture::intel_gpu_lnl_m`, -`architecture::intel_gpu_ptl_h`, and `architecture::intel_gpu_ptl_u` +`architecture::intel_gpu_bmg_g21`, `architecture::intel_gpu_bmg_g31`, +`architecture::intel_gpu_lnl_m`, `architecture::intel_gpu_ptl_h`, +and `architecture::intel_gpu_ptl_u` - The `stride` parameter to `joint_matrix_load` and `joint_matrix_store` has the following restrictions: @@ -1363,4 +1364,4 @@ load/store overloads |11 |2024-04-29 |Yury Plyakhin | Add 1x64x16 supported combination for Intel XMX (intel_gpu_pvc) |12 |2024-06-14 |Jack Kirk | Add note on sm version device matching issue. -|====================== \ No newline at end of file +|======================