2 files changed
+3
-3
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
26 | 26 | | |
27 | 27 | | |
28 | 28 | | |
29 | | - | |
30 | | - | |
| 29 | + | |
| 30 | + | |
31 | 31 | | |
32 | 32 | | |
33 | 33 | | |
| |||
- CMakeLists.txt+2-1
- README.md+19-19
- benchmarks/python/blas/bench_gemm.py+3-5
- benchmarks/python/blas/bench_gemv.py+2-2
- mlx/backend/common/matmul.h+2-2
- mlx/backend/common/ternary.h+12
- mlx/backend/cpu/gemms/cblas.cpp+43
- mlx/backend/cpu/matmul.cpp+3-4
- mlx/backend/cpu/simd/accelerate_simd.h+1-1
- mlx/backend/cpu/sort.cpp+32-2
- mlx/backend/cpu/unary_ops.h+2-1
- mlx/backend/cuda/CMakeLists.txt+4
- mlx/backend/cuda/allocator.cpp+12-7
- mlx/backend/cuda/compiled.cpp+2-2
- mlx/backend/cuda/conv.cpp+10-11
- mlx/backend/cuda/cudnn_utils.cpp+3
- mlx/backend/cuda/device.cpp+13-14
- mlx/backend/cuda/device.h+2-2
- mlx/backend/cuda/device/unary_ops.cuh+2-2
- mlx/backend/cuda/device/utils.cuh+2-2
- mlx/backend/cuda/eval.cpp+18-5
- mlx/backend/cuda/event.cu+53-14
- mlx/backend/cuda/event.h+8-2
- mlx/backend/cuda/gemms/cublas_gemm.cpp+19-6
- mlx/backend/cuda/gemms/cublas_gemm.h+1
- mlx/backend/cuda/gemms/gemv.cu+36-5
- mlx/backend/cuda/jit_module.cpp+55-12
- mlx/backend/cuda/jit_module.h+4-1
- mlx/backend/cuda/kernel_utils.cu+3-5
- mlx/backend/cuda/kernel_utils.cuh+18-9
- mlx/backend/cuda/matmul.cpp+6-1
- mlx/backend/cuda/reduce/col_reduce.cu+93-2
- mlx/backend/cuda/reduce/row_reduce.cu+86-93
- mlx/backend/cuda/ternary.cu+19-18
- mlx/backend/cuda/unary.cu-284
- mlx/backend/cuda/utils.h+15-1
- mlx/backend/cuda/worker.cpp+3-3
- mlx/backend/cuda/worker.h+1-1
- mlx/backend/metal/device.cpp+4
- mlx/backend/metal/indexing.cpp+2-2
- mlx/backend/metal/jit_kernels.cpp+21-6
- mlx/backend/metal/kernels/complex.h+21
- mlx/backend/metal/kernels/gemv.metal+26-9
- mlx/backend/metal/kernels/sort.h+20-3
- mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.metal+1-2
- mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.metal+1-2
- mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.metal+3-1
- mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.metal+1-1
- mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.metal+3-1
- mlx/backend/metal/kernels/steel/gemm/mma.h+411
- mlx/backend/metal/kernels/steel/gemm/transforms.h+2-1
- mlx/backend/metal/kernels/ternary.h+24-6
- mlx/backend/metal/kernels/ternary.metal+9-3
- mlx/backend/metal/kernels/unary_ops.h+2-2
- mlx/backend/metal/matmul.cpp+16-6
- mlx/backend/metal/ternary.cpp+12-5
- mlx/compile.cpp+131-25
- mlx/compile_impl.h+13-2
- mlx/distributed/nccl/nccl.cpp+24-12
- mlx/distributed/primitives.cpp+15-6
- mlx/dtype_utils.h+13
- mlx/export.cpp+201-22
- mlx/export.h+66
- mlx/export_impl.h+27
- mlx/io/load.h+1
- mlx/ops.cpp+57-35
- mlx/ops.h+35
- mlx/utils.h+5
- mlx/version.h+1-1
- python/mlx/nn/losses.py+3-1
- python/mlx/optimizers/optimizers.py+2-6
- python/src/array.cpp+5-19
- python/src/export.cpp+24-6
- python/src/transforms.cpp+23-19
- python/tests/ring_test_distributed.py+10
- python/tests/test_array.py+1
- python/tests/test_autograd.py+1
- python/tests/test_blas.py+9
- python/tests/test_compile.py+84-1
- python/tests/test_export_import.py+46
- python/tests/test_losses.py+12-2
- python/tests/test_ops.py+12-2
0 commit comments