mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-28 15:58:57 -04:00

There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, however currently they are only allowed on OpenCL kernel functions. This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP __global__ functions. Differential Revision: https://reviews.llvm.org/D47958 llvm-svn: 334561
38 lines
1.8 KiB
Plaintext
38 lines
1.8 KiB
Plaintext
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
|
|
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
|
|
// RUN: %clang_cc1 -triple nvptx \
|
|
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
|
|
// RUN: -check-prefix=NAMD
|
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
|
|
// RUN: -verify -o - %s | FileCheck -check-prefix=NAMD %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
|
|
__global__ void flat_work_group_size_32_64() {
|
|
// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
|
|
}
|
|
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
|
|
__global__ void waves_per_eu_2() {
|
|
// CHECK: define amdgpu_kernel void @_Z14waves_per_eu_2v() [[WAVES_PER_EU_2:#[0-9]+]]
|
|
}
|
|
__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
|
|
__global__ void num_sgpr_32() {
|
|
// CHECK: define amdgpu_kernel void @_Z11num_sgpr_32v() [[NUM_SGPR_32:#[0-9]+]]
|
|
}
|
|
__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
|
|
__global__ void num_vgpr_64() {
|
|
// CHECK: define amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
|
|
}
|
|
|
|
// Make sure this is silently accepted on other targets.
|
|
// NAMD-NOT: "amdgpu-flat-work-group-size"
|
|
// NAMD-NOT: "amdgpu-waves-per-eu"
|
|
// NAMD-NOT: "amdgpu-num-vgpr"
|
|
// NAMD-NOT: "amdgpu-num-sgpr"
|
|
|
|
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
|
|
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
|
|
// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"
|
|
// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"
|