@@ -143,48 +143,48 @@ constexpr auto avx2_compressstore_lut64_perm
143143constexpr auto avx2_compressstore_lut64_left
144144 = avx2_compressstore_lut64_gen.second;
145145
146- X86_SIMD_SORT_INLINE
146+ X86_SIMD_SORT_FORCE_INLINE
147147__m256i convert_int_to_avx2_mask (int32_t m)
148148{
149149 return _mm256_loadu_si256 (
150150 (const __m256i *)avx2_mask_helper_lut32[m].data ());
151151}
152152
153- X86_SIMD_SORT_INLINE
153+ X86_SIMD_SORT_FORCE_INLINE
154154int32_t convert_avx2_mask_to_int (__m256i m)
155155{
156156 return _mm256_movemask_ps (_mm256_castsi256_ps (m));
157157}
158158
159- X86_SIMD_SORT_INLINE
159+ X86_SIMD_SORT_FORCE_INLINE
160160__m256i convert_int_to_avx2_mask_64bit (int32_t m)
161161{
162162 return _mm256_loadu_si256 (
163163 (const __m256i *)avx2_mask_helper_lut64[m].data ());
164164}
165165
166- X86_SIMD_SORT_INLINE
166+ X86_SIMD_SORT_FORCE_INLINE
167167int32_t convert_avx2_mask_to_int_64bit (__m256i m)
168168{
169169 return _mm256_movemask_pd (_mm256_castsi256_pd (m));
170170}
171171
172- X86_SIMD_SORT_INLINE
172+ X86_SIMD_SORT_FORCE_INLINE
173173__m128i convert_int_to_avx2_mask_half (int32_t m)
174174{
175175 return _mm_loadu_si128 (
176176 (const __m128i *)avx2_mask_helper_lut32_half[m].data ());
177177}
178178
179- X86_SIMD_SORT_INLINE
179+ X86_SIMD_SORT_FORCE_INLINE
180180int32_t convert_avx2_mask_to_int_half (__m128i m)
181181{
182182 return _mm_movemask_ps (_mm_castsi128_ps (m));
183183}
184184
185185// Emulators for intrinsics missing from AVX2 compared to AVX512
186186template <typename T>
187- T avx2_emu_reduce_max32 (typename avx2_vector<T>::reg_t x)
187+ X86_SIMD_SORT_FORCE_INLINE T avx2_emu_reduce_max32 (typename avx2_vector<T>::reg_t x)
188188{
189189 using vtype = avx2_vector<T>;
190190 using reg_t = typename vtype::reg_t ;
@@ -199,7 +199,7 @@ T avx2_emu_reduce_max32(typename avx2_vector<T>::reg_t x)
199199}
200200
201201template <typename T>
202- T avx2_emu_reduce_max32_half (typename avx2_half_vector<T>::reg_t x)
202+ X86_SIMD_SORT_FORCE_INLINE T avx2_emu_reduce_max32_half (typename avx2_half_vector<T>::reg_t x)
203203{
204204 using vtype = avx2_half_vector<T>;
205205 using reg_t = typename vtype::reg_t ;
@@ -212,7 +212,7 @@ T avx2_emu_reduce_max32_half(typename avx2_half_vector<T>::reg_t x)
212212}
213213
214214template <typename T>
215- T avx2_emu_reduce_min32 (typename avx2_vector<T>::reg_t x)
215+ X86_SIMD_SORT_FORCE_INLINE T avx2_emu_reduce_min32 (typename avx2_vector<T>::reg_t x)
216216{
217217 using vtype = avx2_vector<T>;
218218 using reg_t = typename vtype::reg_t ;
@@ -227,7 +227,7 @@ T avx2_emu_reduce_min32(typename avx2_vector<T>::reg_t x)
227227}
228228
229229template <typename T>
230- T avx2_emu_reduce_min32_half (typename avx2_half_vector<T>::reg_t x)
230+ X86_SIMD_SORT_FORCE_INLINE T avx2_emu_reduce_min32_half (typename avx2_half_vector<T>::reg_t x)
231231{
232232 using vtype = avx2_half_vector<T>;
233233 using reg_t = typename vtype::reg_t ;
@@ -240,7 +240,7 @@ T avx2_emu_reduce_min32_half(typename avx2_half_vector<T>::reg_t x)
240240}
241241
242242template <typename T>
243- T avx2_emu_reduce_max64 (typename avx2_vector<T>::reg_t x)
243+ X86_SIMD_SORT_FORCE_INLINE T avx2_emu_reduce_max64 (typename avx2_vector<T>::reg_t x)
244244{
245245 using vtype = avx2_vector<T>;
246246 typename vtype::reg_t inter1 = vtype::max (
@@ -251,7 +251,7 @@ T avx2_emu_reduce_max64(typename avx2_vector<T>::reg_t x)
251251}
252252
253253template <typename T>
254- T avx2_emu_reduce_min64 (typename avx2_vector<T>::reg_t x)
254+ X86_SIMD_SORT_FORCE_INLINE T avx2_emu_reduce_min64 (typename avx2_vector<T>::reg_t x)
255255{
256256 using vtype = avx2_vector<T>;
257257 typename vtype::reg_t inter1 = vtype::min (
@@ -262,7 +262,7 @@ T avx2_emu_reduce_min64(typename avx2_vector<T>::reg_t x)
262262}
263263
264264template <typename T>
265- void avx2_emu_mask_compressstoreu32 (void *base_addr,
265+ X86_SIMD_SORT_FORCE_INLINE void avx2_emu_mask_compressstoreu32 (void *base_addr,
266266 typename avx2_vector<T>::opmask_t k,
267267 typename avx2_vector<T>::reg_t reg)
268268{
@@ -282,7 +282,7 @@ void avx2_emu_mask_compressstoreu32(void *base_addr,
282282}
283283
284284template <typename T>
285- void avx2_emu_mask_compressstoreu32_half (
285+ X86_SIMD_SORT_FORCE_INLINE void avx2_emu_mask_compressstoreu32_half (
286286 void *base_addr,
287287 typename avx2_half_vector<T>::opmask_t k,
288288 typename avx2_half_vector<T>::reg_t reg)
@@ -305,7 +305,7 @@ void avx2_emu_mask_compressstoreu32_half(
305305}
306306
307307template <typename T>
308- void avx2_emu_mask_compressstoreu64 (void *base_addr,
308+ X86_SIMD_SORT_FORCE_INLINE void avx2_emu_mask_compressstoreu64 (void *base_addr,
309309 typename avx2_vector<T>::opmask_t k,
310310 typename avx2_vector<T>::reg_t reg)
311311{
@@ -326,7 +326,7 @@ void avx2_emu_mask_compressstoreu64(void *base_addr,
326326}
327327
328328template <typename T>
329- int avx2_double_compressstore32 (void *left_addr,
329+ X86_SIMD_SORT_FORCE_INLINE int avx2_double_compressstore32 (void *left_addr,
330330 void *right_addr,
331331 typename avx2_vector<T>::opmask_t k,
332332 typename avx2_vector<T>::reg_t reg)
@@ -349,7 +349,7 @@ int avx2_double_compressstore32(void *left_addr,
349349}
350350
351351template <typename T>
352- int avx2_double_compressstore32_half (void *left_addr,
352+ X86_SIMD_SORT_FORCE_INLINE int avx2_double_compressstore32_half (void *left_addr,
353353 void *right_addr,
354354 typename avx2_half_vector<T>::opmask_t k,
355355 typename avx2_half_vector<T>::reg_t reg)
@@ -373,7 +373,7 @@ int avx2_double_compressstore32_half(void *left_addr,
373373}
374374
375375template <typename T>
376- int32_t avx2_double_compressstore64 (void *left_addr,
376+ X86_SIMD_SORT_FORCE_INLINE int32_t avx2_double_compressstore64 (void *left_addr,
377377 void *right_addr,
378378 typename avx2_vector<T>::opmask_t k,
379379 typename avx2_vector<T>::reg_t reg)
@@ -397,7 +397,7 @@ int32_t avx2_double_compressstore64(void *left_addr,
397397}
398398
399399template <typename T>
400- typename avx2_vector<T>::reg_t avx2_emu_max (typename avx2_vector<T>::reg_t x,
400+ X86_SIMD_SORT_FORCE_INLINE typename avx2_vector<T>::reg_t avx2_emu_max (typename avx2_vector<T>::reg_t x,
401401 typename avx2_vector<T>::reg_t y)
402402{
403403 using vtype = avx2_vector<T>;
@@ -408,7 +408,7 @@ typename avx2_vector<T>::reg_t avx2_emu_max(typename avx2_vector<T>::reg_t x,
408408}
409409
410410template <typename T>
411- typename avx2_vector<T>::reg_t avx2_emu_min (typename avx2_vector<T>::reg_t x,
411+ X86_SIMD_SORT_FORCE_INLINE typename avx2_vector<T>::reg_t avx2_emu_min (typename avx2_vector<T>::reg_t x,
412412 typename avx2_vector<T>::reg_t y)
413413{
414414 using vtype = avx2_vector<T>;
0 commit comments