mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-19 11:35:51 -04:00

This adds extra scalar handling to isFMAFasterThanFMulAndFAdd, allowing the target independent code to handle more folds in more situations (for example if the fast math flags are present, but the global AllowFPOpFusion option isnt). It also splits apart the HasSlowFPVMLx into HasSlowFPVFMx, to allow VFMA and VMLA to be controlled separately if needed. Differential Revision: https://reviews.llvm.org/D72139
430 lines
13 KiB
LLVM
430 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
|
|
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
|
|
|
|
; Check generated fp16 fused MAC and MLS.
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
|
|
; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f1, %f2
|
|
%2 = fadd half %1, %f3
|
|
store half %2, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest4:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r2]
|
|
; CHECK-NEXT: vldr.16 s2, [r1]
|
|
; CHECK-NEXT: vldr.16 s4, [r0]
|
|
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest4:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r2]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
|
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f2, %f3
|
|
%2 = fsub half %f1, %1
|
|
store half %2, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest6:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest6:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
|
|
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f1, %f2
|
|
%2 = fsub half -0.0, %1
|
|
%3 = fsub half %2, %f3
|
|
store half %3, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest8:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest8:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
|
|
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f1, %f2
|
|
%2 = fsub half %1, %f3
|
|
store half %2, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
|
|
; CHECK-LABEL: test_fma_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fma_f16:
|
|
; DONT-FUSE: @ %bb.0: @ %entry
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
entry:
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%c = load half, half *%cc, align 2
|
|
%tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
|
|
store half %tmp1, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
|
|
; CHECK-LABEL: test_fnms_f16:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fnms_f16:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%c = load half, half *%cc, align 2
|
|
%tmp2 = fsub half -0.0, %c
|
|
%tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
|
|
store half %tmp3, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
|
|
; CHECK-LABEL: test_fma_const_fold:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vadd.f16 s0, s2, s0
|
|
; CHECK-NEXT: vstr.16 s0, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fma_const_fold:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
|
|
store half %ret, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
|
|
; CHECK-LABEL: test_fma_canonicalize:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r0]
|
|
; CHECK-NEXT: vldr.16 s2, [r1]
|
|
; CHECK-NEXT: vmov.f16 s4, #2.000000e+00
|
|
; CHECK-NEXT: vfma.f16 s2, s0, s4
|
|
; CHECK-NEXT: vstr.16 s2, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fma_canonicalize:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
|
; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00
|
|
; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
|
|
; DONT-FUSE-NEXT: vstr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
|
|
store half %ret, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fms1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fms1:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%s = fsub half -0.0, %f1
|
|
%ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fms2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r0]
|
|
; CHECK-NEXT: vldr.16 s2, [r1]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fms2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%s = fsub half -0.0, %f1
|
|
%ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnma1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnma1:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
|
|
%n1 = fsub half -0.0, %fma
|
|
store half %n1, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnma2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnma2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n1 = fsub half -0.0, %f1
|
|
%n3 = fsub half -0.0, %f3
|
|
%ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnms1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnms1:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n3 = fsub half -0.0, %f3
|
|
%ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnms2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnms2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n1 = fsub half -0.0, %f1
|
|
%fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
|
|
%n = fsub half -0.0, %fma
|
|
store half %n, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnms3:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnms3:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n2 = fsub half -0.0, %f2
|
|
%fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
|
|
%n1 = fsub half -0.0, %fma
|
|
store half %n1, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
|
|
declare half @llvm.fma.f16(half, half, half) nounwind readnone
|