teak-llvm/llvm/test/CodeGen/X86/statepoint-no-realign-stack.ll
Philip Reames 511be2a158 [Statepoints] Fix overalignment of loads in no-realign-stack functions
This really should have been part of 366765.  For some reason, I forgot to handle the corresponding load side, and the readable test cases (using deopt vs statepoints) turned out to be overly reduced.  Oops.

As seen in the test change, the problem was that we were using a load with alignment expectations rather than the unaligned variant when the stack alignment was less than that prefered type alignment.

llvm-svn: 367718
2019-08-02 20:17:37 +00:00

108 lines
4.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=skylake < %s | FileCheck %s
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Shows a case where we spill a 32 byte value onto a stack which is only
; 16 byte aligned. With stack realignment, we can use an aligned spill slot
; (if we think it's profitable), but without realignment, using a stack
; slot which is 32 byte aligned or a store which expects 32 byte alignment
; is incorrect.
declare void @foo()
define void @can_realign(<8 x i32>* %p) {
; CHECK-LABEL: can_realign:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq foo
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
%val = load <8 x i32>, <8 x i32>* %p, align 32
call void @foo() ["deopt" (<8 x i32> %val)]
ret void
}
define void @no_realign(<8 x i32>* %p) "no-realign-stack" {
; CHECK-LABEL: no_realign:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vmovups %ymm0, (%rsp)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq foo
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
%val = load <8 x i32>, <8 x i32>* %p, align 32
call void @foo() ["deopt" (<8 x i32> %val)]
ret void
}
;; Next batch are similiar to the above, but require a reload of the
;; spilled value as well.
define <4 x i8 addrspace(1)*> @spillfill_can_realign(<4 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
; CHECK-LABEL: spillfill_can_realign:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq do_safepoint
; CHECK-NEXT: .Ltmp2:
; CHECK-NEXT: vmovaps (%rsp), %ymm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
entry:
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <4 x i8 addrspace(1)*> %obj)
%obj.relocated = call coldcc <4 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v4p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
ret <4 x i8 addrspace(1)*> %obj.relocated
}
define <4 x i8 addrspace(1)*> @spillfill_no_realign(<4 x i8 addrspace(1)*> %obj) "no-realign-stack" gc "statepoint-example" {
; CHECK-LABEL: spillfill_no_realign:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: vmovups %ymm0, (%rsp)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq do_safepoint
; CHECK-NEXT: .Ltmp3:
; CHECK-NEXT: vmovups (%rsp), %ymm0
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <4 x i8 addrspace(1)*> %obj)
%obj.relocated = call coldcc <4 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v4p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
ret <4 x i8 addrspace(1)*> %obj.relocated
}
declare void @do_safepoint()
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
declare <4 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v4p1i8(token, i32, i32)