mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-28 07:49:01 -04:00

In addition to enabling ELFv2 homogeneous aggregate handling, LLVM support to pass array types directly also enables a performance enhancement. We can now pass (non-homogeneous) aggregates that fit fully in registers as direct integer arrays, using an element type to encode the alignment requirement (that would otherwise go to the "byval align" field). This is preferable since "byval" forces the back-end to write the aggregate out to the stack, even if it could be passed fully in registers. This is particularly annoying on ELFv2, if there is no parameter save area available, since we then need to allocate space on the callee's stack just to hold those aggregates. Note that to implement this optimization, this patch does not attempt to fully anticipate register allocation rules as (defined in the ABI and) implemented in the back-end. Instead, the patch is simply passing *any* aggregate passed by value using the array mechanism if its size is up to 64 bytes. This means that some of those will end up being passed in stack slots anyway, but the generated code shouldn't be any worse either. (*Large* aggregates remain passed using "byval" to enable optimized copying via memcpy etc.) llvm-svn: 213495
53 lines
1.2 KiB
C
53 lines
1.2 KiB
C
// RUN: %clang_cc1 -faltivec -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
|
|
|
|
typedef short v2i16 __attribute__((vector_size (4)));
|
|
typedef short v3i16 __attribute__((vector_size (6)));
|
|
typedef short v4i16 __attribute__((vector_size (8)));
|
|
typedef short v6i16 __attribute__((vector_size (12)));
|
|
typedef short v8i16 __attribute__((vector_size (16)));
|
|
typedef short v16i16 __attribute__((vector_size (32)));
|
|
|
|
struct v16i16 { v16i16 x; };
|
|
|
|
// CHECK: define i32 @test_v2i16(i32 %x.coerce)
|
|
v2i16 test_v2i16(v2i16 x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
// CHECK: define i64 @test_v3i16(i64 %x.coerce)
|
|
v3i16 test_v3i16(v3i16 x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
// CHECK: define i64 @test_v4i16(i64 %x.coerce)
|
|
v4i16 test_v4i16(v4i16 x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
// CHECK: define <6 x i16> @test_v6i16(<6 x i16> %x)
|
|
v6i16 test_v6i16(v6i16 x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
// CHECK: define <8 x i16> @test_v8i16(<8 x i16> %x)
|
|
v8i16 test_v8i16(v8i16 x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
// CHECK: define void @test_v16i16(<16 x i16>* noalias sret %agg.result, <16 x i16>*)
|
|
v16i16 test_v16i16(v16i16 x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, [2 x i128] %x.coerce)
|
|
struct v16i16 test_struct_v16i16(struct v16i16 x)
|
|
{
|
|
return x;
|
|
}
|