mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-28 07:49:01 -04:00

Summary: Before this patch, we computed the offsets in memory of args passed to GPU kernel functions by throwing all of the args into an LLVM struct. clang emits packed llvm structs basically whenever it feels like it, and packed structs have alignment 1. So we cannot rely on the llvm type's alignment matching the C++ type's alignment. This patch fixes our codegen so we always respect the clang types' alignments. Reviewers: rnk Subscribers: cfe-commits, tra Differential Revision: https://reviews.llvm.org/D22879 llvm-svn: 276927
37 lines
1.2 KiB
Plaintext
37 lines
1.2 KiB
Plaintext
// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \
|
|
// RUN: FileCheck -check-prefix HOST -check-prefix CHECK %s
|
|
|
|
// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \
|
|
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix DEVICE -check-prefix CHECK %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
struct U {
|
|
short x;
|
|
} __attribute__((packed));
|
|
|
|
struct S {
|
|
int *ptr;
|
|
char a;
|
|
U u;
|
|
};
|
|
|
|
// Clang should generate a packed LLVM struct for S (denoted by the <>s),
|
|
// otherwise this test isn't interesting.
|
|
// CHECK: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }>
|
|
|
|
static_assert(alignof(S) == 8, "Unexpected alignment.");
|
|
|
|
// HOST-LABEL: @_Z6kernelc1SPi
|
|
// Marshalled kernel args should be:
|
|
// 1. offset 0, width 1
|
|
// 2. offset 8 (because alignof(S) == 8), width 16
|
|
// 3. offset 24, width 8
|
|
// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 1, i64 0)
|
|
// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 16, i64 8)
|
|
// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24)
|
|
|
|
// DEVICE-LABEL: @_Z6kernelc1SPi
|
|
// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32*
|
|
__global__ void kernel(char a, S s, int *b) {}
|