mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-30 00:38:54 -04:00

OpenCL runtime tracks the invoke function emitted for any block expression. Due to restrictions on blocks in OpenCL (v2.0 s6.12.5), it is always possible to know the block invoke function when emitting call of block expression or __enqueue_kernel builtin functions. Since __enqueu_kernel already has an argument for the invoke function, it is redundant to have invoke function member in the llvm block literal structure. This patch removes invoke function from the llvm block literal structure. It also removes the bitcast of block invoke function to the generic block literal type which is useless for OpenCL. This will save some space for the kernel argument, and also eliminate some store instructions. Differential Revision: https://reviews.llvm.org/D43783 llvm-svn: 326937
59 lines
2.6 KiB
Common Lisp
59 lines
2.6 KiB
Common Lisp
// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s
|
|
|
|
typedef struct {int a;} ndrange_t;
|
|
|
|
void callee(long id, global long *out) {
|
|
out[id] = id;
|
|
}
|
|
|
|
// CHECK-LABEL: define amdgpu_kernel void @test
|
|
kernel void test(global char *a, char b, global long *c, long d) {
|
|
queue_t default_queue;
|
|
unsigned flags = 0;
|
|
ndrange_t ndrange;
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(void) {
|
|
a[0] = b;
|
|
});
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(void) {
|
|
a[0] = b;
|
|
c[0] = d;
|
|
});
|
|
enqueue_kernel(default_queue, flags, ndrange,
|
|
^(local void *lp) {
|
|
a[0] = b;
|
|
c[0] = d;
|
|
((local int*)lp)[0] = 1;
|
|
}, 100);
|
|
|
|
void (^block)(void) = ^{
|
|
callee(d, c);
|
|
};
|
|
|
|
enqueue_kernel(default_queue, flags, ndrange, block);
|
|
}
|
|
|
|
// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }>)
|
|
// CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
|
|
// CHECK: entry:
|
|
// CHECK: %1 = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
|
|
// CHECK: store <{ i32, i32, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8
|
|
// CHECK: %2 = addrspacecast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8*
|
|
// CHECK: call void @__test_block_invoke(i8* %2)
|
|
// CHECK: ret void
|
|
// CHECK:}
|
|
|
|
// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
|
|
// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
|
|
|
|
// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
|
|
// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
|
|
|
|
// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i64, i64 addrspace(1)* }>)
|
|
// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
|
|
|
|
// CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }
|