mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-20 03:55:48 -04:00

There are a few different issues, mostly stemming from using generation based checks for anything instead of subtarget features. Stop adding flat-address-space as a feature for HSA, as it should only be a device property. This was incorrectly allowing flat instructions to select for SI. Increase the default generation for HSA to avoid the encoding error when emitting objects. This has some other side effects from various checks which probably should be separate subtarget features (in the cost model and for dealing with the DS offset folding issue). Partial fix for bug 41070. It should probably be an error to try using amdhsa without flat support. llvm-svn: 356347
1438 lines
106 KiB
LLVM
1438 lines
106 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; FIXME: Manually added checks for metadata nodes at bottom
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=HSA %s
|
|
; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s
|
|
|
|
define amdgpu_kernel void @kern_noargs() {
|
|
; HSA-LABEL: @kern_noargs(
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noargs(
|
|
; MESA-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
|
|
; HSA-LABEL: @kern_i8(
|
|
; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i8(
|
|
; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
|
|
; HSA-LABEL: @kern_i16(
|
|
; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i16(
|
|
; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, i16 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_f16(half %arg) #0 {
|
|
; HSA-LABEL: @kern_f16(
|
|
; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_f16(
|
|
; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store half %arg, half addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
|
|
; HSA-LABEL: @kern_zeroext_i8(
|
|
; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_zeroext_i8(
|
|
; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
|
|
; HSA-LABEL: @kern_zeroext_i16(
|
|
; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_zeroext_i16(
|
|
; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, i16 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
|
|
; HSA-LABEL: @kern_signext_i8(
|
|
; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_signext_i8(
|
|
; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i8 %arg, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
|
|
; HSA-LABEL: @kern_signext_i16(
|
|
; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_signext_i16(
|
|
; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i16 %arg, i16 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
|
|
; HSA-LABEL: @kern_i8_i8(
|
|
; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i8_i8(
|
|
; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef, align 1
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef, align 1
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
|
|
; HSA-LABEL: @kern_v3i8(
|
|
; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
|
|
; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v3i8(
|
|
; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
|
|
; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <3 x i8> %arg, <3 x i8> addrspace(1)* undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i24(i24 %arg0) {
|
|
; HSA-LABEL: @kern_i24(
|
|
; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; HSA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i24(
|
|
; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
|
|
; MESA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i24 %arg0, i24 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i32(i32 %arg0) {
|
|
; HSA-LABEL: @kern_i32(
|
|
; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i32(
|
|
; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_f32(float %arg0) {
|
|
; HSA-LABEL: @kern_f32(
|
|
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_f32(
|
|
; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store float %arg0, float addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
|
|
; HSA-LABEL: @kern_v3i32(
|
|
; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v3i32(
|
|
; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
|
|
; HSA-LABEL: @kern_v8i32(
|
|
; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v8i32(
|
|
; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <8 x i32> %arg, <8 x i32> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
|
|
; HSA-LABEL: @kern_v8i64(
|
|
; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v8i64(
|
|
; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <8 x i64> %arg, <8 x i64> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
|
|
; HSA-LABEL: @kern_v16i64(
|
|
; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
|
|
; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_v16i64(
|
|
; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
|
|
; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store <16 x i64> %arg, <16 x i64> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
|
|
; HSA-LABEL: @kern_i32_v3i32(
|
|
; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
|
|
; HSA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_i32_v3i32(
|
|
; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
|
|
; MESA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, i32 addrspace(1)* undef
|
|
store <3 x i32> %arg1, <3 x i32> addrspace(1)* undef, align 4
|
|
ret void
|
|
}
|
|
|
|
%struct.a = type { i32, i8, [4 x i8] }
|
|
%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> }
|
|
|
|
define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
|
|
; HSA-LABEL: @kern_struct_a(
|
|
; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_struct_a(
|
|
; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store %struct.a %arg0, %struct.a addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
|
|
; HSA-LABEL: @kern_struct_b_packed(
|
|
; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_struct_b_packed(
|
|
; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
|
|
; HSA-LABEL: @kern_implicit_arg_num_bytes(
|
|
; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_implicit_arg_num_bytes(
|
|
; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg0, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
|
|
; HSA-LABEL: @kernel_implicitarg_no_struct_align(
|
|
; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kernel_implicitarg_no_struct_align(
|
|
; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg1, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
|
|
; HSA-LABEL: @kern_lds_ptr(
|
|
; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
|
|
; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_lds_ptr(
|
|
; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
|
|
; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 0, i32 addrspace(3)* %lds, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
|
|
; HSA-LABEL: @kern_lds_ptr_si(
|
|
; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
|
|
; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_lds_ptr_si(
|
|
; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 0, i32 addrspace(3)* %lds, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
store volatile i8 %arg2, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
store volatile i8 %arg2, i8 addrspace(1)* undef
|
|
store volatile i8 %arg3, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_v3i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_v3i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile <3 x i8> %arg1, <3 x i8> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i16(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i16(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i16 %arg1, i16 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i1 %arg1, i1 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i1 %arg1, i1 addrspace(1)* undef
|
|
store volatile i1 %arg2, i1 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i1 %arg1, i1 addrspace(1)* undef
|
|
store volatile i1 %arg2, i1 addrspace(1)* undef
|
|
store volatile i1 %arg3, i1 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_v3i1(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_v3i1(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile <3 x i1> %arg1, <3 x i1> addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_i1_i16(
|
|
; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i1_i16(
|
|
; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i1 %arg0, i1 addrspace(1)* undef
|
|
store volatile i16 %arg1, i16 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
|
|
; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
|
|
; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
|
|
; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
|
|
; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
|
|
; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
|
|
; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
|
|
; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
|
|
; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
|
|
; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
|
|
; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
|
|
; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
|
|
; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
|
|
; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
|
|
; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
|
|
; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
|
|
; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
|
|
; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
|
|
; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
|
|
; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
|
|
; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
|
|
; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 %arg0, i8 addrspace(1)* undef
|
|
store volatile i8 %arg1, i8 addrspace(1)* undef
|
|
store volatile i8 %arg2, i8 addrspace(1)* undef
|
|
store volatile i8 %arg3, i8 addrspace(1)* undef
|
|
store volatile i8 %arg5, i8 addrspace(1)* undef
|
|
store volatile i8 %arg6, i8 addrspace(1)* undef
|
|
store volatile i8 %arg7, i8 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
|
|
; HSA-LABEL: @kern_realign_f16_f16(
|
|
; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
|
|
; HSA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
|
|
; HSA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_realign_f16_f16(
|
|
; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
|
|
; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
|
|
; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
|
|
; MESA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
|
|
; MESA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile half %arg0, half addrspace(1)* undef
|
|
store volatile half %arg1, half addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr_dereferencable(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr_dereferencable(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 {
|
|
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
|
|
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
|
|
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 {
|
|
; HSA-LABEL: @kern_nonnull_global_ptr(
|
|
; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_nonnull_global_ptr(
|
|
; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 {
|
|
; HSA-LABEL: @kern_align32_global_ptr(
|
|
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_align32_global_ptr(
|
|
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
|
|
; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
|
|
; HSA-LABEL: @kern_noalias_global_ptr(
|
|
; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noalias_global_ptr(
|
|
; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
|
|
; HSA-LABEL: @kern_noalias_global_ptr_x2(
|
|
; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @kern_noalias_global_ptr_x2(
|
|
; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef
|
|
store volatile i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
|
|
; HSA-LABEL: @struct_i8_i8_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @struct_i8_i8_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue {i8, i8} %in, 0
|
|
%elt1 = extractvalue {i8, i8} %in, 1
|
|
store volatile i8 %elt0, i8 addrspace(1)* null, align 4
|
|
store volatile i8 %elt1, i8 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
|
|
; HSA-LABEL: @struct_i8_i16_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @struct_i8_i16_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue {i8, i16} %in, 0
|
|
%elt1 = extractvalue {i8, i16} %in, 1
|
|
store volatile i8 %elt0, i8 addrspace(1)* null, align 4
|
|
store volatile i16 %elt1, i16 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
|
|
; HSA-LABEL: @array_2xi8_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @array_2xi8_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue [2 x i8] %in, 0
|
|
%elt1 = extractvalue [2 x i8] %in, 1
|
|
store volatile i8 %elt0, i8 addrspace(1)* null, align 4
|
|
store volatile i8 %elt1, i8 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
|
|
; HSA-LABEL: @array_2xi1_arg(
|
|
; HSA-NEXT: entry:
|
|
; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
|
|
; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
|
|
; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
|
|
; HSA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
|
|
; HSA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @array_2xi1_arg(
|
|
; MESA-NEXT: entry:
|
|
; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
|
|
; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
|
|
; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
|
|
; MESA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
|
|
; MESA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
|
|
; MESA-NEXT: ret void
|
|
;
|
|
entry:
|
|
%elt0 = extractvalue [2 x i1] %in, 0
|
|
%elt1 = extractvalue [2 x i1] %in, 1
|
|
store volatile i1 %elt0, i1 addrspace(1)* null, align 4
|
|
store volatile i1 %elt1, i1 addrspace(1)* null, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
|
|
; HSA-LABEL: @only_empty_struct(
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @only_empty_struct(
|
|
; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: ret void
|
|
;
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
|
|
; HSA-LABEL: @empty_struct_with_other(
|
|
; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
|
|
; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
|
|
; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; HSA-NEXT: ret void
|
|
;
|
|
; MESA-LABEL: @empty_struct_with_other(
|
|
; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
|
|
; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
|
|
; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
|
|
; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
|
|
; MESA-NEXT: ret void
|
|
;
|
|
store i32 %arg1, i32 addrspace(1)* undef
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "target-cpu"="kaveri" }
|
|
attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
|
|
attributes #2 = { nounwind "target-cpu"="tahiti" }
|
|
|
|
; GCN: 0 = !{}
|
|
; GCN: !1 = !{i64 42}
|
|
; GCN: !2 = !{i64 128}
|
|
; GCN: !3 = !{i64 1024}
|