Open
Description
https://godbolt.org/z/on6Pj6Tsc
Starting from 5c0da58, InstCombine transforms this IR:
define linkonce_odr hidden noundef half @_Float16 __llvm_libc_20_0_0_git::fputil::abs<_Float16, 0>(_Float16)(half noundef %x) local_unnamed_addr #1 comdat {
entry:
%0 = bitcast half %x to i16
%1 = and i16 %0, 32767
%2 = bitcast i16 %1 to half
ret half %2
}
into:
define linkonce_odr hidden noundef half @_Float16 __llvm_libc_20_0_0_git::fputil::abs<_Float16, 0>(_Float16)(half noundef %x) local_unnamed_addr #1 comdat {
entry:
%0 = call half @llvm.fabs.f16(half %x)
ret half %0
}
On x86, when AVX-512 FP16 is not available, this generates something like:
.LCPI0_0:
.long 0x7fffffff
__llvm_libc_20_0_0_git::fabsf16(_Float16):
push rbp
mov rbp, rsp
call __extendhfsf2@PLT
vbroadcastss xmm1, dword ptr [rip + .LCPI0_0]
vandps xmm0, xmm0, xmm1
call __truncsfhf2@PLT
pop rbp
ret
whereas with LLVM 16, bitcast
-> and
-> bitcast
generates something like:
__llvm_libc_20_0_0_git::fabsf16(_Float16):
push rbp
mov rbp, rsp
vpextrw eax, xmm0, 0
and eax, 32767
vpinsrw xmm0, xmm0, eax, 0
pop rbp
ret
Related: