teak-llvm/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll
David Green fb8c9a339a [ARM] Use isFMAFasterThanFMulAndFAdd for scalars as well as MVE vectors
This adds extra scalar handling to isFMAFasterThanFMulAndFAdd, allowing
the target independent code to handle more folds in more situations (for
example if the fast math flags are present, but the global
AllowFPOpFusion option isnt). It also splits apart the HasSlowFPVMLx
into HasSlowFPVFMx, to allow VFMA and VMLA to be controlled separately
if needed.

Differential Revision: https://reviews.llvm.org/D72139
2020-01-05 11:24:04 +00:00

430 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
; Check generated fp16 fused MAC and MLS.
define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fadd half %1, %f3
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest4:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r2]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r0]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest4:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r2]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f2, %f3
%2 = fsub half %f1, %1
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest6:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest6:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fsub half -0.0, %1
%3 = fsub half %2, %f3
store half %3, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest8:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fsub half %1, %f3
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
; CHECK-LABEL: test_fma_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_f16:
; DONT-FUSE: @ %bb.0: @ %entry
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
entry:
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%c = load half, half *%cc, align 2
%tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
store half %tmp1, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
; CHECK-LABEL: test_fnms_f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fnms_f16:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%c = load half, half *%cc, align 2
%tmp2 = fsub half -0.0, %c
%tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
store half %tmp3, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
; CHECK-LABEL: test_fma_const_fold:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vadd.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_const_fold:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
store half %ret, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
; CHECK-LABEL: test_fma_canonicalize:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vmov.f16 s4, #2.000000e+00
; CHECK-NEXT: vfma.f16 s2, s0, s4
; CHECK-NEXT: vstr.16 s2, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_canonicalize:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00
; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
; DONT-FUSE-NEXT: vstr.16 s2, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
store half %ret, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%s = fsub half -0.0, %f1
%ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%s = fsub half -0.0, %f1
%ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnma1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
%n1 = fsub half -0.0, %fma
store half %n1, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnma2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n1 = fsub half -0.0, %f1
%n3 = fsub half -0.0, %f3
%ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n3 = fsub half -0.0, %f3
%ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n1 = fsub half -0.0, %f1
%fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
%n = fsub half -0.0, %fma
store half %n, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms3:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms3:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n2 = fsub half -0.0, %f2
%fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
%n1 = fsub half -0.0, %fma
store half %n1, half *%a1, align 2
ret void
}
declare half @llvm.fma.f16(half, half, half) nounwind readnone