teak-llvm/clang/test/CodeGen/lzcnt-builtins.c
Craig Topper fb5d9f2849 [X86] For lzcnt/tzcnt intrinsics use cttz/ctlz intrinsics with zero_undef flag set to false.
Previously we used a select and the zero_undef=true intrinsic. In -O2 this pattern will get optimized to zero_undef=false. But in -O0 this optimization won't happen. This results in a compare and cmov being wrapped around a tzcnt/lzcnt instruction.

By using the zero_undef=false intrinsic directly without the select, we can improve the -O0 codegen to just an lzcnt/tzcnt instruction.

Differential Revision: https://reviews.llvm.org/D52392

llvm-svn: 343126
2018-09-26 17:01:44 +00:00

35 lines
801 B
C

// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
#include <immintrin.h>
unsigned short test__lzcnt16(unsigned short __X)
{
// CHECK: @llvm.ctlz.i16(i16 %{{.*}}, i1 false)
return __lzcnt16(__X);
}
unsigned int test_lzcnt32(unsigned int __X)
{
// CHECK: @llvm.ctlz.i32(i32 %{{.*}}, i1 false)
return __lzcnt32(__X);
}
unsigned long long test__lzcnt64(unsigned long long __X)
{
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
return __lzcnt64(__X);
}
unsigned int test_lzcnt_u32(unsigned int __X)
{
// CHECK: @llvm.ctlz.i32(i32 %{{.*}}, i1 false)
return _lzcnt_u32(__X);
}
unsigned long long test__lzcnt_u64(unsigned long long __X)
{
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
return _lzcnt_u64(__X);
}