mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-20 03:55:48 -04:00

Detailed description: After https://reviews.llvm.org/D59990 submit several issues were discovered. Changes in common code were preserved but AMDGPU specific part was reverted to keep the backend working correctly. Discovered issues were addressed in the following commits: https://reviews.llvm.org/D67662 https://reviews.llvm.org/D67101 https://reviews.llvm.org/D63953 https://reviews.llvm.org/D63731 This change brings back AMDGPU specific changes. Reviewed by: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D68635 llvm-svn: 374767
117 lines
4.2 KiB
LLVM
117 lines
4.2 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
|
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
|
|
|
|
|
; DAGCombiner will transform:
|
|
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
|
|
; unless isFabsFree returns true
|
|
|
|
; FUNC-LABEL: {{^}}s_fabs_fn_free:
|
|
; R600-NOT: AND
|
|
; R600: |PV.{{[XYZW]}}|
|
|
|
|
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
|
|
; VI: s_bitset0_b32 s{{[0-9]+}}, 31
|
|
define amdgpu_kernel void @s_fabs_fn_free(float addrspace(1)* %out, i32 %in) {
|
|
%bc= bitcast i32 %in to float
|
|
%fabs = call float @fabs(float %bc)
|
|
store float %fabs, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}s_fabs_free:
|
|
; R600-NOT: AND
|
|
; R600: |PV.{{[XYZW]}}|
|
|
|
|
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
|
|
; VI: s_bitset0_b32 s{{[0-9]+}}, 31
|
|
define amdgpu_kernel void @s_fabs_free(float addrspace(1)* %out, i32 %in) {
|
|
%bc= bitcast i32 %in to float
|
|
%fabs = call float @llvm.fabs.f32(float %bc)
|
|
store float %fabs, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}s_fabs_f32:
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
|
|
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
|
|
; VI: s_bitset0_b32 s{{[0-9]+}}, 31
|
|
define amdgpu_kernel void @s_fabs_f32(float addrspace(1)* %out, float %in) {
|
|
%fabs = call float @llvm.fabs.f32(float %in)
|
|
store float %fabs, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}fabs_v2f32:
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
|
|
; GCN: s_and_b32
|
|
; GCN: s_and_b32
|
|
define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
|
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
|
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}fabs_v4f32:
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
|
|
|
|
; GCN: s_and_b32
|
|
; GCN: s_and_b32
|
|
; GCN: s_and_b32
|
|
; GCN: s_and_b32
|
|
define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
|
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
|
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fabs_fn_fold:
|
|
; SI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
|
|
; VI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
|
|
; GCN-NOT: and
|
|
; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]]
|
|
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]]
|
|
define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
|
%fabs = call float @fabs(float %in0)
|
|
%fmul = fmul float %fabs, %in1
|
|
store float %fmul, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}fabs_fold:
|
|
; SI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xb
|
|
; VI: s_load_dwordx2 s{{\[}}[[ABS_VALUE:[0-9]+]]:[[MUL_VAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x2c
|
|
; GCN-NOT: and
|
|
; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[MUL_VAL]]
|
|
; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[ABS_VALUE]]|, [[V_MUL_VI]]
|
|
define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
|
|
%fabs = call float @llvm.fabs.f32(float %in0)
|
|
%fmul = fmul float %fabs, %in1
|
|
store float %fmul, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; Make sure we turn some integer operations back into fabs
|
|
; FUNC-LABEL: {{^}}bitpreserve_fabs_f32:
|
|
; GCN: v_add_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, 1.0
|
|
define amdgpu_kernel void @bitpreserve_fabs_f32(float addrspace(1)* %out, float %in) {
|
|
%in.bc = bitcast float %in to i32
|
|
%int.abs = and i32 %in.bc, 2147483647
|
|
%bc = bitcast i32 %int.abs to float
|
|
%fadd = fadd float %bc, 1.0
|
|
store float %fadd, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
declare float @fabs(float) readnone
|
|
declare float @llvm.fabs.f32(float) readnone
|
|
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
|
|
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
|