mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-23 21:45:46 -04:00

Previously we used a select and the zero_undef=true intrinsic. In -O2 this pattern will get optimized to zero_undef=false. But in -O0 this optimization won't happen. This results in a compare and cmov being wrapped around a tzcnt/lzcnt instruction. By using the zero_undef=false intrinsic directly without the select, we can improve the -O0 codegen to just an lzcnt/tzcnt instruction. Differential Revision: https://reviews.llvm.org/D52392 llvm-svn: 343126
35 lines
801 B
C
35 lines
801 B
C
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
|
|
|
|
|
|
#include <immintrin.h>
|
|
|
|
unsigned short test__lzcnt16(unsigned short __X)
|
|
{
|
|
// CHECK: @llvm.ctlz.i16(i16 %{{.*}}, i1 false)
|
|
return __lzcnt16(__X);
|
|
}
|
|
|
|
unsigned int test_lzcnt32(unsigned int __X)
|
|
{
|
|
// CHECK: @llvm.ctlz.i32(i32 %{{.*}}, i1 false)
|
|
return __lzcnt32(__X);
|
|
}
|
|
|
|
unsigned long long test__lzcnt64(unsigned long long __X)
|
|
{
|
|
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
|
|
return __lzcnt64(__X);
|
|
}
|
|
|
|
unsigned int test_lzcnt_u32(unsigned int __X)
|
|
{
|
|
// CHECK: @llvm.ctlz.i32(i32 %{{.*}}, i1 false)
|
|
return _lzcnt_u32(__X);
|
|
}
|
|
|
|
unsigned long long test__lzcnt_u64(unsigned long long __X)
|
|
{
|
|
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
|
|
return _lzcnt_u64(__X);
|
|
}
|