teak-llvm/llvm/test/CodeGen/X86/atomic-mi.ll
Craig Topper f7e548c076 Recommit r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2"
With correct test checks this time.

If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integ

This matches what gcc and icc do for this case and removes an existing FIXME.

llvm-svn: 358214
2019-04-11 19:19:42 +00:00

1902 lines
51 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix FAST_INC
; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix X64 --check-prefix SLOW_INC
; This file checks that atomic (non-seq_cst) stores of immediate values are
; done in one mov instruction and not 2. More precisely, it makes sure that the
; immediate is not first copied uselessly into a register.
; Similarily, it checks that a binary operation of an immediate with an atomic
; variable that is stored back in that variable is done as a single instruction.
; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
; should be just an add instruction, instead of loading x into a register, doing
; an add and storing the result back.
; The binary operations supported are currently add, and, or, xor.
; sub is not supported because they are translated by an addition of the
; negated immediate.
;
; We also check the same patterns:
; - For inc/dec.
; - For register instead of immediate operands.
; - For floating point operations.
; seq_cst stores are left as (lock) xchgl, but we try to check every other
; attribute at least once.
; Please note that these operations do not require the lock prefix: only
; sequentially consistent stores require this kind of protection on X86.
; And even for seq_cst operations, llvm uses the xchg instruction which has
; an implicit lock prefix, so making it explicit is not required.
define void @store_atomic_imm_8(i8* %p) {
; X64-LABEL: store_atomic_imm_8:
; X64: # %bb.0:
; X64-NEXT: movb $42, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: store_atomic_imm_8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movb $42, (%eax)
; X32-NEXT: retl
store atomic i8 42, i8* %p release, align 1
ret void
}
define void @store_atomic_imm_16(i16* %p) {
; X64-LABEL: store_atomic_imm_16:
; X64: # %bb.0:
; X64-NEXT: movw $42, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: store_atomic_imm_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movw $42, (%eax)
; X32-NEXT: retl
store atomic i16 42, i16* %p monotonic, align 2
ret void
}
define void @store_atomic_imm_32(i32* %p) {
; X64-LABEL: store_atomic_imm_32:
; X64: # %bb.0:
; X64-NEXT: movl $42, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: store_atomic_imm_32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl $42, (%eax)
; X32-NEXT: retl
; On 32 bits, there is an extra movl for each of those functions
; (probably for alignment reasons).
store atomic i32 42, i32* %p release, align 4
ret void
}
define void @store_atomic_imm_64(i64* %p) {
; X64-LABEL: store_atomic_imm_64:
; X64: # %bb.0:
; X64-NEXT: movq $42, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: store_atomic_imm_64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: movl $42, %ebx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB3_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: popl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
; These are implemented with a CAS loop on 32 bit architectures, and thus
; cannot be optimized in the same way as the others.
store atomic i64 42, i64* %p release, align 8
ret void
}
; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
; even on X64, one must use movabsq that can only target a register.
define void @store_atomic_imm_64_big(i64* %p) {
; X64-LABEL: store_atomic_imm_64_big:
; X64: # %bb.0:
; X64-NEXT: movabsq $100000000000, %rax # imm = 0x174876E800
; X64-NEXT: movq %rax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: store_atomic_imm_64_big:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: movl $23, %ecx
; X32-NEXT: movl $1215752192, %ebx # imm = 0x4876E800
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB4_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB4_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: popl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
store atomic i64 100000000000, i64* %p monotonic, align 8
ret void
}
; It would be incorrect to replace a lock xchgl by a movl
define void @store_atomic_imm_32_seq_cst(i32* %p) {
; X64-LABEL: store_atomic_imm_32_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl $42, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: store_atomic_imm_32_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl $42, %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
store atomic i32 42, i32* %p seq_cst, align 4
ret void
}
; ----- ADD -----
define void @add_8i(i8* %p) {
; X64-LABEL: add_8i:
; X64: # %bb.0:
; X64-NEXT: addb $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_8i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: addb $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @add_8r(i8* %p, i8 %v) {
; X64-LABEL: add_8r:
; X64: # %bb.0:
; X64-NEXT: addb %sil, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_8r:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addb %al, (%ecx)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, %v
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @add_16i(i16* %p) {
; X64-LABEL: add_16i:
; X64: # %bb.0:
; X64-NEXT: addw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: addw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @add_16r(i16* %p, i16 %v) {
; X64-LABEL: add_16r:
; X64: # %bb.0:
; X64-NEXT: addw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_16r:
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, %v
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @add_32i(i32* %p) {
; X64-LABEL: add_32i:
; X64: # %bb.0:
; X64-NEXT: addl $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_32i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: addl $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, 2
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
define void @add_32r(i32* %p, i32 %v) {
; X64-LABEL: add_32r:
; X64: # %bb.0:
; X64-NEXT: addl %esi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_32r:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl %eax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, %v
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
; The following is a corner case where the load is added to itself. The pattern
; matching should not fold this. We only test with 32-bit add, but the same
; applies to other sizes and operations.
define void @add_32r_self(i32* %p) {
; X64-LABEL: add_32r_self:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: addl %eax, %eax
; X64-NEXT: movl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_32r_self:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: addl %ecx, %ecx
; X32-NEXT: movl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, %1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
; The following is a corner case where the load's result is returned. The
; optimizer isn't allowed to duplicate the load because it's atomic.
define i32 @add_32r_ret_load(i32* %p, i32 %v) {
; X64-LABEL: add_32r_ret_load:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: addl %eax, %esi
; X64-NEXT: movl %esi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_32r_ret_load:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl %eax, %edx
; X32-NEXT: movl %edx, (%ecx)
; X32-NEXT: retl
; More code here, we just don't want it to load from P.
%1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, %v
store atomic i32 %2, i32* %p monotonic, align 4
ret i32 %1
}
define void @add_64i(i64* %p) {
; X64-LABEL: add_64i:
; X64: # %bb.0:
; X64-NEXT: addq $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_64i:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $2, %ebx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB14_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB14_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'addq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @add_64r(i64* %p, i64 %v) {
; X64-LABEL: add_64r:
; X64: # %bb.0:
; X64-NEXT: addq %rsi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_64r:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl 12(%ebp), %ebx
; X32-NEXT: adcl 16(%ebp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB15_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB15_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'addq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, %v
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @add_32i_seq_cst(i32* %p) {
; X64-LABEL: add_32i_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: addl $2, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_32i_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: addl $2, %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
define void @add_32r_seq_cst(i32* %p, i32 %v) {
; X64-LABEL: add_32r_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: addl %esi, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: add_32r_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, %v
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- SUB -----
define void @sub_8r(i8* %p, i8 %v) {
; X64-LABEL: sub_8r:
; X64: # %bb.0:
; X64-NEXT: subb %sil, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_8r:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subb %al, (%ecx)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = sub i8 %1, %v
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @sub_16r(i16* %p, i16 %v) {
; X64-LABEL: sub_16r:
; X64: # %bb.0:
; X64-NEXT: subw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_16r:
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 %1, %v
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @sub_32r(i32* %p, i32 %v) {
; X64-LABEL: sub_32r:
; X64: # %bb.0:
; X64-NEXT: subl %esi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_32r:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subl %eax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = sub i32 %1, %v
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
; The following is a corner case where the load is subed to itself. The pattern
; matching should not fold this. We only test with 32-bit sub, but the same
; applies to other sizes and operations.
define void @sub_32r_self(i32* %p) {
; X64-LABEL: sub_32r_self:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl $0, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_32r_self:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: movl $0, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = sub i32 %1, %1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
; The following is a corner case where the load's result is returned. The
; optimizer isn't allowed to duplicate the load because it's atomic.
define i32 @sub_32r_ret_load(i32* %p, i32 %v) {
; X64-LABEL: sub_32r_ret_load:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: subl %esi, %ecx
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_32r_ret_load:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %eax
; X32-NEXT: movl %eax, %edx
; X32-NEXT: subl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, (%ecx)
; X32-NEXT: retl
; More code here, we just don't want it to load from P.
%1 = load atomic i32, i32* %p acquire, align 4
%2 = sub i32 %1, %v
store atomic i32 %2, i32* %p monotonic, align 4
ret i32 %1
}
define void @sub_64r(i64* %p, i64 %v) {
; X64-LABEL: sub_64r:
; X64: # %bb.0:
; X64-NEXT: subq %rsi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_64r:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subl 12(%ebp), %ebx
; X32-NEXT: sbbl 16(%ebp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB23_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB23_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'subq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = sub i64 %1, %v
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @sub_32r_seq_cst(i32* %p, i32 %v) {
; X64-LABEL: sub_32r_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: subl %esi, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: sub_32r_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = sub i32 %1, %v
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- AND -----
define void @and_8i(i8* %p) {
; X64-LABEL: and_8i:
; X64: # %bb.0:
; X64-NEXT: andb $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_8i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p monotonic, align 1
%2 = and i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @and_8r(i8* %p, i8 %v) {
; X64-LABEL: and_8r:
; X64: # %bb.0:
; X64-NEXT: andb %sil, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_8r:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andb %al, (%ecx)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p monotonic, align 1
%2 = and i8 %1, %v
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @and_16i(i16* %p) {
; X64-LABEL: and_16i:
; X64: # %bb.0:
; X64-NEXT: andw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @and_16r(i16* %p, i16 %v) {
; X64-LABEL: and_16r:
; X64: # %bb.0:
; X64-NEXT: andw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_16r:
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, %v
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @and_32i(i32* %p) {
; X64-LABEL: and_32i:
; X64: # %bb.0:
; X64-NEXT: andl $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_32i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andl $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = and i32 %1, 2
store atomic i32 %2, i32* %p release, align 4
ret void
}
define void @and_32r(i32* %p, i32 %v) {
; X64-LABEL: and_32r:
; X64: # %bb.0:
; X64-NEXT: andl %esi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_32r:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andl %eax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = and i32 %1, %v
store atomic i32 %2, i32* %p release, align 4
ret void
}
define void @and_64i(i64* %p) {
; X64-LABEL: and_64i:
; X64: # %bb.0:
; X64-NEXT: andq $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_64i:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: andl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB31_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB31_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'andq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = and i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @and_64r(i64* %p, i64 %v) {
; X64-LABEL: and_64r:
; X64: # %bb.0:
; X64-NEXT: andq %rsi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_64r:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andl 16(%ebp), %ecx
; X32-NEXT: andl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB32_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB32_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'andq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = and i64 %1, %v
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @and_32i_seq_cst(i32* %p) {
; X64-LABEL: and_32i_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: andl $2, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_32i_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: andl $2, %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = and i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
define void @and_32r_seq_cst(i32* %p, i32 %v) {
; X64-LABEL: and_32r_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: and_32r_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = and i32 %1, %v
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- OR -----
define void @or_8i(i8* %p) {
; X64-LABEL: or_8i:
; X64: # %bb.0:
; X64-NEXT: orb $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_8i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: orb $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p acquire, align 1
%2 = or i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @or_8r(i8* %p, i8 %v) {
; X64-LABEL: or_8r:
; X64: # %bb.0:
; X64-NEXT: orb %sil, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_8r:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orb %al, (%ecx)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p acquire, align 1
%2 = or i8 %1, %v
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @or_16i(i16* %p) {
; X64-LABEL: or_16i:
; X64: # %bb.0:
; X64-NEXT: orw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: orw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @or_16r(i16* %p, i16 %v) {
; X64-LABEL: or_16r:
; X64: # %bb.0:
; X64-NEXT: orw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_16r:
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, %v
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @or_32i(i32* %p) {
; X64-LABEL: or_32i:
; X64: # %bb.0:
; X64-NEXT: orl $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_32i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: orl $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = or i32 %1, 2
store atomic i32 %2, i32* %p release, align 4
ret void
}
define void @or_32r(i32* %p, i32 %v) {
; X64-LABEL: or_32r:
; X64: # %bb.0:
; X64-NEXT: orl %esi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_32r:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl %eax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = or i32 %1, %v
store atomic i32 %2, i32* %p release, align 4
ret void
}
define void @or_64i(i64* %p) {
; X64-LABEL: or_64i:
; X64: # %bb.0:
; X64-NEXT: orq $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_64i:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB41_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB41_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'orq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = or i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @or_64r(i64* %p, i64 %v) {
; X64-LABEL: or_64r:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_64r:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orl 16(%ebp), %ecx
; X32-NEXT: orl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB42_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB42_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'orq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = or i64 %1, %v
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @or_32i_seq_cst(i32* %p) {
; X64-LABEL: or_32i_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: orl $2, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_32i_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: orl $2, %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = or i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
define void @or_32r_seq_cst(i32* %p, i32 %v) {
; X64-LABEL: or_32r_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: or_32r_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = or i32 %1, %v
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- XOR -----
define void @xor_8i(i8* %p) {
; X64-LABEL: xor_8i:
; X64: # %bb.0:
; X64-NEXT: xorb $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_8i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorb $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p acquire, align 1
%2 = xor i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @xor_8r(i8* %p, i8 %v) {
; X64-LABEL: xor_8r:
; X64: # %bb.0:
; X64-NEXT: xorb %sil, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_8r:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorb %al, (%ecx)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p acquire, align 1
%2 = xor i8 %1, %v
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @xor_16i(i16* %p) {
; X64-LABEL: xor_16i:
; X64: # %bb.0:
; X64-NEXT: xorw $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_16i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorw $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @xor_16r(i16* %p, i16 %v) {
; X64-LABEL: xor_16r:
; X64: # %bb.0:
; X64-NEXT: xorw %si, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_16r:
; X32: # %bb.0:
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorw %ax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, %v
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @xor_32i(i32* %p) {
; X64-LABEL: xor_32i:
; X64: # %bb.0:
; X64-NEXT: xorl $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_32i:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorl $2, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = xor i32 %1, 2
store atomic i32 %2, i32* %p release, align 4
ret void
}
define void @xor_32r(i32* %p, i32 %v) {
; X64-LABEL: xor_32r:
; X64: # %bb.0:
; X64-NEXT: xorl %esi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_32r:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl %eax, (%ecx)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = xor i32 %1, %v
store atomic i32 %2, i32* %p release, align 4
ret void
}
define void @xor_64i(i64* %p) {
; X64-LABEL: xor_64i:
; X64: # %bb.0:
; X64-NEXT: xorq $2, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_64i:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl $2, %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB51_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB51_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'xorq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = xor i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @xor_64r(i64* %p, i64 %v) {
; X64-LABEL: xor_64r:
; X64: # %bb.0:
; X64-NEXT: xorq %rsi, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_64r:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl 16(%ebp), %ecx
; X32-NEXT: xorl 12(%ebp), %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB52_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB52_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'xorq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = xor i64 %1, %v
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @xor_32i_seq_cst(i32* %p) {
; X64-LABEL: xor_32i_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: xorl $2, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_32i_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: xorl $2, %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = xor i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
define void @xor_32r_seq_cst(i32* %p, i32 %v) {
; X64-LABEL: xor_32r_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: xorl %esi, %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: xor_32r_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = xor i32 %1, %v
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- INC -----
define void @inc_8(i8* %p) {
; FAST_INC-LABEL: inc_8:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: incb (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: inc_8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incb (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_8:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addb $1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, 1
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @inc_16(i16* %p) {
; FAST_INC-LABEL: inc_16:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: incw (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: inc_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incw (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_16:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addw $1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 1
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @inc_32(i32* %p) {
; FAST_INC-LABEL: inc_32:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: incl (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: inc_32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_32:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addl $1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, 1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
define void @inc_64(i64* %p) {
; FAST_INC-LABEL: inc_64:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: incq (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: inc_64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $1, %ebx
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB58_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB58_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_64:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addq $1, (%rdi)
; SLOW_INC-NEXT: retq
; We do not check X86-32 as it cannot do 'incq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, 1
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @inc_32_seq_cst(i32* %p) {
; FAST_INC-LABEL: inc_32_seq_cst:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: movl (%rdi), %eax
; FAST_INC-NEXT: incl %eax
; FAST_INC-NEXT: xchgl %eax, (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: inc_32_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: incl %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: inc_32_seq_cst:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: movl (%rdi), %eax
; SLOW_INC-NEXT: addl $1, %eax
; SLOW_INC-NEXT: xchgl %eax, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- DEC -----
define void @dec_8(i8* %p) {
; FAST_INC-LABEL: dec_8:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: decb (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: dec_8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: decb (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_8:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addb $-1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = sub i8 %1, 1
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @dec_16(i16* %p) {
; FAST_INC-LABEL: dec_16:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: decw (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: dec_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: decw (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_16:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addw $-1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 %1, 1
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @dec_32(i32* %p) {
; FAST_INC-LABEL: dec_32:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: decl (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: dec_32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: decl (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_32:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addl $-1, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i32, i32* %p acquire, align 4
%2 = sub i32 %1, 1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
define void @dec_64(i64* %p) {
; FAST_INC-LABEL: dec_64:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: decq (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: dec_64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl $-1, %ebx
; X32-NEXT: adcl $-1, %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB63_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB63_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_64:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: addq $-1, (%rdi)
; SLOW_INC-NEXT: retq
; We do not check X86-32 as it cannot do 'decq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = sub i64 %1, 1
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @dec_32_seq_cst(i32* %p) {
; FAST_INC-LABEL: dec_32_seq_cst:
; FAST_INC: # %bb.0:
; FAST_INC-NEXT: movl (%rdi), %eax
; FAST_INC-NEXT: decl %eax
; FAST_INC-NEXT: xchgl %eax, (%rdi)
; FAST_INC-NEXT: retq
;
; X32-LABEL: dec_32_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: decl %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
;
; SLOW_INC-LABEL: dec_32_seq_cst:
; SLOW_INC: # %bb.0:
; SLOW_INC-NEXT: movl (%rdi), %eax
; SLOW_INC-NEXT: addl $-1, %eax
; SLOW_INC-NEXT: xchgl %eax, (%rdi)
; SLOW_INC-NEXT: retq
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = sub i32 %1, 1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- NOT -----
define void @not_8(i8* %p) {
; X64-LABEL: not_8:
; X64: # %bb.0:
; X64-NEXT: notb (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: not_8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: notb (%eax)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = xor i8 %1, -1
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @not_16(i16* %p) {
; X64-LABEL: not_16:
; X64: # %bb.0:
; X64-NEXT: notw (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: not_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: notw (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, -1
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @not_32(i32* %p) {
; X64-LABEL: not_32:
; X64: # %bb.0:
; X64-NEXT: notl (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: not_32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: notl (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = xor i32 %1, -1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
define void @not_64(i64* %p) {
; X64-LABEL: not_64:
; X64: # %bb.0:
; X64-NEXT: notq (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: not_64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: movl (%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: notl %ecx
; X32-NEXT: notl %ebx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB68_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB68_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do not check X86-32 as it cannot do 'notq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = xor i64 %1, -1
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @not_32_seq_cst(i32* %p) {
; X64-LABEL: not_32_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: notl %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: not_32_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: notl %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = xor i32 %1, -1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
; ----- NEG -----
define void @neg_8(i8* %p) {
; X64-LABEL: neg_8:
; X64: # %bb.0:
; X64-NEXT: negb (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: neg_8:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: negb (%eax)
; X32-NEXT: retl
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = sub i8 0, %1
store atomic i8 %2, i8* %p release, align 1
ret void
}
define void @neg_16(i16* %p) {
; X64-LABEL: neg_16:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: negl %eax
; X64-NEXT: movw %ax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: neg_16:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl (%eax), %ecx
; X32-NEXT: negl %ecx
; X32-NEXT: movw %cx, (%eax)
; X32-NEXT: retl
%1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 0, %1
store atomic i16 %2, i16* %p release, align 2
ret void
}
define void @neg_32(i32* %p) {
; X64-LABEL: neg_32:
; X64: # %bb.0:
; X64-NEXT: negl (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: neg_32:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: negl (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p acquire, align 4
%2 = sub i32 0, %1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
}
define void @neg_64(i64* %p) {
; X64-LABEL: neg_64:
; X64: # %bb.0:
; X64-NEXT: negq (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: neg_64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: movl 8(%ebp), %esi
; X32-NEXT: fildll (%esi)
; X32-NEXT: fistpll (%esp)
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: subl (%esp), %ebx
; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%esi), %eax
; X32-NEXT: movl 4(%esi), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB73_1: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: lock cmpxchg8b (%esi)
; X32-NEXT: jne .LBB73_1
; X32-NEXT: # %bb.2: # %atomicrmw.end
; X32-NEXT: leal -8(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
; We do neg check X86-32 as it canneg do 'negq'.
%1 = load atomic i64, i64* %p acquire, align 8
%2 = sub i64 0, %1
store atomic i64 %2, i64* %p release, align 8
ret void
}
define void @neg_32_seq_cst(i32* %p) {
; X64-LABEL: neg_32_seq_cst:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: negl %eax
; X64-NEXT: xchgl %eax, (%rdi)
; X64-NEXT: retq
;
; X32-LABEL: neg_32_seq_cst:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %ecx
; X32-NEXT: negl %ecx
; X32-NEXT: xchgl %ecx, (%eax)
; X32-NEXT: retl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = sub i32 0, %1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}