teak-llvm/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Alex Bradbury 299d690a50 [RISCV] Custom-legalise 32-bit variable shifts on RV64
The previous DAG combiner-based approach had an issue with infinite loops
between the target-dependent and target-independent combiner logic (see
PR40333). Although this was worked around in rL351806, the combiner-based
approach is still potentially brittle and can fail to select the 32-bit shift
variant when profitable to do so, as demonstrated in the pr40333.ll test case.

This patch instead introduces target-specific SelectionDAG nodes for
SHLW/SRLW/SRAW and custom-lowers variable i32 shifts to them. pr40333.ll is a
good example of how this approach can improve codegen.

This adds DAG combine that does SimplifyDemandedBits on the operands (only
lower 32-bits of first operand and lower 5 bits of second operand are read).
This seems better than implementing SimplifyDemandedBitsForTargetNode as there
is no guarantee that would be called (and it's not for e.g. the anyext return
test cases). Also implements ComputeNumSignBitsForTargetNode.

There are codegen changes in atomic-rmw.ll and atomic-cmpxchg.ll but the new
instruction sequences are semantically equivalent.

Differential Revision: https://reviews.llvm.org/D57085

llvm-svn: 352169
2019-01-25 05:04:00 +00:00

1917 lines
71 KiB
C++

//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that RISCV uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "RISCVISelLowering.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
MVT XLenVT = Subtarget.getXLenVT();
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
setStackPointerRegisterToSaveRestore(RISCV::X2);
for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
setLoadExtAction(N, XLenVT, MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
setOperationAction(ISD::SELECT, XLenVT, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (Subtarget.is64Bit()) {
setTargetDAGCombine(ISD::ANY_EXTEND);
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
}
if (!Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, XLenVT, Expand);
setOperationAction(ISD::MULHS, XLenVT, Expand);
setOperationAction(ISD::MULHU, XLenVT, Expand);
setOperationAction(ISD::SDIV, XLenVT, Expand);
setOperationAction(ISD::UDIV, XLenVT, Expand);
setOperationAction(ISD::SREM, XLenVT, Expand);
setOperationAction(ISD::UREM, XLenVT, Expand);
}
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
setOperationAction(ISD::BSWAP, XLenVT, Expand);
setOperationAction(ISD::CTTZ, XLenVT, Expand);
setOperationAction(ISD::CTLZ, XLenVT, Expand);
setOperationAction(ISD::CTPOP, XLenVT, Expand);
ISD::CondCode FPCCToExtend[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ,
ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
ISD::SETGT, ISD::SETGE, ISD::SETNE};
ISD::NodeType FPOpToExtend[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
for (auto CC : FPCCToExtend)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
for (auto Op : FPOpToExtend)
setOperationAction(Op, MVT::f32, Expand);
}
if (Subtarget.hasStdExtD()) {
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
for (auto CC : FPCCToExtend)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto Op : FPOpToExtend)
setOperationAction(Op, MVT::f64, Expand);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else {
setMaxAtomicSizeInBitsSupported(0);
}
setBooleanContents(ZeroOrOneBooleanContent);
// Function alignments (log2).
unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2;
setMinFunctionAlignment(FunctionAlignment);
setPrefFunctionAlignment(FunctionAlignment);
// Effectively disable jump table generation.
setMinimumJumpTableEntries(INT_MAX);
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
return VT.changeVectorElementTypeToInteger();
}
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
default:
return false;
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
case Intrinsic::riscv_masked_atomicrmw_add_i32:
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
case Intrinsic::riscv_masked_atomicrmw_max_i32:
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
case Intrinsic::riscv_masked_cmpxchg_i32:
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = 4;
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
}
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (!AM.HasBaseReg) // allow "r+i".
break;
return false; // disallow "r+r" or "r+r+i".
default:
return false;
}
return true;
}
bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
// On RV32, 64-bit integers are split into their high and low parts and held
// in two different registers, so the trunc is free since the low register can
// just be used.
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
return false;
unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
unsigned DestBits = DstTy->getPrimitiveSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
!SrcVT.isInteger() || !DstVT.isInteger())
return false;
unsigned SrcBits = SrcVT.getSizeInBits();
unsigned DestBits = DstVT.getSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
// Zexts are free if they can be combined with a load.
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
EVT MemVT = LD->getMemoryVT();
if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
(Subtarget.is64Bit() && MemVT == MVT::i32)) &&
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
LD->getExtensionType() == ISD::ZEXTLOAD))
return true;
}
return TargetLowering::isZExtFree(Val, VT2);
}
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly in the RISC-V
// ISA.
static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
switch (CC) {
default:
break;
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETUGT:
case ISD::SETULE:
CC = ISD::getSetCCSwappedOperands(CC);
std::swap(LHS, RHS);
break;
}
}
// Return the RISC-V branch opcode that matches the given DAG integer
// condition code. The CondCode must be one of those supported by the RISC-V
// ISA (see normaliseSetCC).
static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unsupported CondCode");
case ISD::SETEQ:
return RISCV::BEQ;
case ISD::SETNE:
return RISCV::BNE;
case ISD::SETLT:
return RISCV::BLT;
case ISD::SETGE:
return RISCV::BGE;
case ISD::SETULT:
return RISCV::BLTU;
case ISD::SETUGE:
return RISCV::BGEU;
}
}
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
report_fatal_error("unimplemented operand");
case ISD::GlobalAddress:
return lowerGlobalAddress(Op, DAG);
case ISD::BlockAddress:
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
case ISD::VASTART:
return lowerVASTART(Op, DAG);
case ISD::FRAMEADDR:
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
}
}
SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
if (isPositionIndependent())
report_fatal_error("Unable to lowerGlobalAddress");
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
SDValue MNLo =
SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
DAG.getConstant(Offset, DL, XLenVT));
return MNLo;
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = N->getBlockAddress();
int64_t Offset = N->getOffset();
if (isPositionIndependent())
report_fatal_error("Unable to lowerBlockAddress");
SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
SDValue MNLo =
SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
return MNLo;
}
SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
const Constant *CPA = N->getConstVal();
int64_t Offset = N->getOffset();
unsigned Alignment = N->getAlignment();
if (!isPositionIndependent()) {
SDValue CPAHi =
DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
SDValue CPALo =
DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
SDValue MNLo =
SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
return MNLo;
} else {
report_fatal_error("Unable to lowerConstantPool");
}
}
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(0);
SDValue TrueV = Op.getOperand(1);
SDValue FalseV = Op.getOperand(2);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
// If the result type is XLenVT and CondV is the output of a SETCC node
// which also operated on XLenVT inputs, then merge the SETCC node into the
// lowered RISCVISD::SELECT_CC to take advantage of the integer
// compare+branch instructions. i.e.:
// (select (setcc lhs, rhs, cc), truev, falsev)
// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
CondV.getOperand(0).getSimpleValueType() == XLenVT) {
SDValue LHS = CondV.getOperand(0);
SDValue RHS = CondV.getOperand(1);
auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
ISD::CondCode CCVal = CC->get();
normaliseSetCC(LHS, RHS, CCVal);
SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
}
// Otherwise:
// (select condv, truev, falsev)
// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
}
SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
SDLoc DL(Op);
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy(MF.getDataLayout()));
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
unsigned FrameReg = RI.getFrameRegister(MF);
int XLenInBytes = Subtarget.getXLen() / 8;
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
while (Depth--) {
int Offset = -(XLenInBytes * 2);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
DAG.getIntPtrConstant(Offset, DL));
FrameAddr =
DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
}
return FrameAddr;
}
SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
MVT XLenVT = Subtarget.getXLenVT();
int XLenInBytes = Subtarget.getXLen() / 8;
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
int Off = -XLenInBytes;
SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(Off, DL, VT);
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo());
}
// Return the value of the return address register, marking it an implicit
// live-in.
unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
case ISD::SHL:
return RISCVISD::SLLW;
case ISD::SRA:
return RISCVISD::SRAW;
case ISD::SRL:
return RISCVISD::SRLW;
}
}
// Converts the given 32-bit operation to a target-specific SelectionDAG node.
// Because i32 isn't a legal type for RV64, these operations would otherwise
// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
// later one because the fact the operation was originally of type i32 is
// lost.
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return value.
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
}
void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDLoc DL(N);
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom type legalize this operation!");
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() == ISD::Constant)
return;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
}
}
// Returns true if the given node is an sdiv, udiv, or urem with non-constant
// operands.
static bool isVariableSDivUDivURem(SDValue Val) {
switch (Val.getOpcode()) {
default:
return false;
case ISD::SDIV:
case ISD::UDIV:
case ISD::UREM:
return Val.getOperand(0).getOpcode() != ISD::Constant &&
Val.getOperand(1).getOpcode() != ISD::Constant;
}
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default:
break;
case ISD::ANY_EXTEND: {
// If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend
// in order to increase the chance of being able to select the
// divw/divuw/remuw instructions.
SDValue Src = N->getOperand(0);
if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
break;
if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
break;
SDLoc DL(N);
// Don't add the new node to the DAGCombiner worklist, in order to avoid
// an infinite cycle due to SimplifyDemandedBits converting the
// SIGN_EXTEND back to ANY_EXTEND.
return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src),
false);
}
case RISCVISD::SplitF64: {
// If the input to SplitF64 is just BuildPairF64 then the operation is
// redundant. Instead, use BuildPairF64's operands directly.
SDValue Op0 = N->getOperand(0);
if (Op0->getOpcode() != RISCVISD::BuildPairF64)
break;
return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
(SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
return SDValue();
break;
}
}
return SDValue();
}
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
return 33;
}
return 1;
}
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
unsigned LoReg = MI.getOperand(0).getReg();
unsigned HiReg = MI.getOperand(1).getReg();
unsigned SrcReg = MI.getOperand(2).getReg();
const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
RI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
MachineMemOperand::MOLoad, 8, 8);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMO);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
"Unexpected instruction");
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
unsigned DstReg = MI.getOperand(0).getReg();
unsigned LoReg = MI.getOperand(1).getReg();
unsigned HiReg = MI.getOperand(2).getReg();
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
MachineMemOperand::MOStore, 8, 8);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMO);
TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");
case RISCV::Select_GPR_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
break;
case RISCV::BuildPairF64Pseudo:
return emitBuildPairF64Pseudo(MI, BB);
case RISCV::SplitF64Pseudo:
return emitSplitF64Pseudo(MI, BB);
}
// To "insert" a SELECT instruction, we actually have to insert the triangle
// control-flow pattern. The incoming instruction knows the destination vreg
// to set, the condition code register to branch on, the true/false values to
// select between, and the condcode to use to select the appropriate branch.
//
// We produce the following control flow:
// HeadMBB
// | \
// | IfFalseMBB
// | /
// TailMBB
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator I = ++BB->getIterator();
MachineBasicBlock *HeadMBB = BB;
MachineFunction *F = BB->getParent();
MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(I, IfFalseMBB);
F->insert(I, TailMBB);
// Move all remaining instructions to TailMBB.
TailMBB->splice(TailMBB->begin(), HeadMBB,
std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
// Set the successors for HeadMBB.
HeadMBB->addSuccessor(IfFalseMBB);
HeadMBB->addSuccessor(TailMBB);
// Insert appropriate branch.
unsigned LHS = MI.getOperand(1).getReg();
unsigned RHS = MI.getOperand(2).getReg();
auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
BuildMI(HeadMBB, DL, TII.get(Opcode))
.addReg(LHS)
.addReg(RHS)
.addMBB(TailMBB);
// IfFalseMBB just falls through to TailMBB.
IfFalseMBB->addSuccessor(TailMBB);
// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
MI.getOperand(0).getReg())
.addReg(MI.getOperand(4).getReg())
.addMBB(HeadMBB)
.addReg(MI.getOperand(5).getReg())
.addMBB(IfFalseMBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return TailMBB;
}
// Calling Convention Implementation.
// The expectations for frontend ABI lowering vary from target to target.
// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
// details, but this is a longer term goal. For now, we simply try to keep the
// role of the frontend as simple and well-defined as possible. The rules can
// be summarised as:
// * Never split up large scalar arguments. We handle them here.
// * If a hardfloat calling convention is being used, and the struct may be
// passed in a pair of registers (fp+fp, int+fp), and both registers are
// available, then pass as two separate arguments. If either the GPRs or FPRs
// are exhausted, then pass according to the rule below.
// * If a struct could never be passed in registers or directly in a stack
// slot (as it is larger than 2*XLEN and the floating point rules don't
// apply), then pass it using a pointer with the byval attribute.
// * If a struct is less than 2*XLEN, then coerce to either a two-element
// word-sized array or a 2*XLEN scalar (depending on alignment).
// * The frontend can determine whether a struct is returned by reference or
// not based on its size and fields. If it will be returned by reference, the
// frontend must modify the prototype so a pointer with the sret annotation is
// passed as the first argument. This is not necessary for large scalar
// returns.
// * Struct return values and varargs should be coerced to structs containing
// register-size fields in the same situations they would be for fixed
// arguments.
static const MCPhysReg ArgGPRs[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
};
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
MVT ValVT2, MVT LocVT2,
ISD::ArgFlagsTy ArgFlags2) {
unsigned XLenInBytes = XLen / 8;
if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
VA1.getLocVT(), CCValAssign::Full));
} else {
// Both halves must be passed on the stack, with proper alignment.
unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
State.addLoc(
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
State.AllocateStack(XLenInBytes, StackAlign),
VA1.getLocVT(), CCValAssign::Full));
State.addLoc(CCValAssign::getMem(
ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
CCValAssign::Full));
return false;
}
if (unsigned Reg = State.AllocateReg(ArgGPRs)) {
// The second half can also be passed via register.
State.addLoc(
CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
} else {
// The second half is passed via the stack, without additional alignment.
State.addLoc(CCValAssign::getMem(
ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
CCValAssign::Full));
}
return false;
}
// Implements the RISC-V calling convention. Returns true upon failure.
static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
if (ValVT == MVT::f32) {
LocVT = MVT::i32;
LocInfo = CCValAssign::BCvt;
}
// Any return value split in to more than two values can't be returned
// directly.
if (IsRet && ValNo > 1)
return true;
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
// be used regardless of whether the original argument was split during
// legalisation or not. The argument will not be passed by registers if the
// original type is larger than 2*XLEN, so the register alignment rule does
// not apply.
unsigned TwoXLenInBytes = (2 * XLen) / 8;
if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
// Skip 'odd' register if necessary.
if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
State.AllocateReg(ArgGPRs);
}
SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
State.getPendingArgFlags();
assert(PendingLocs.size() == PendingArgFlags.size() &&
"PendingLocs and PendingArgFlags out of sync");
// Handle passing f64 on RV32D with a soft float ABI.
if (XLen == 32 && ValVT == MVT::f64) {
assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
"Can't lower f64 if it is split");
// Depending on available argument GPRS, f64 may be passed in a pair of
// GPRs, split between a GPR and the stack, or passed completely on the
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
// cases.
unsigned Reg = State.AllocateReg(ArgGPRs);
LocVT = MVT::i32;
if (!Reg) {
unsigned StackOffset = State.AllocateStack(8, 8);
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
if (!State.AllocateReg(ArgGPRs))
State.AllocateStack(4, 4);
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
// Split arguments might be passed indirectly, so keep track of the pending
// values.
if (ArgFlags.isSplit() || !PendingLocs.empty()) {
LocVT = XLenVT;
LocInfo = CCValAssign::Indirect;
PendingLocs.push_back(
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
PendingArgFlags.push_back(ArgFlags);
if (!ArgFlags.isSplitEnd()) {
return false;
}
}
// If the split argument only had two elements, it should be passed directly
// in registers or on the stack.
if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
// Apply the normal calling convention rules to the first half of the
// split argument.
CCValAssign VA = PendingLocs[0];
ISD::ArgFlagsTy AF = PendingArgFlags[0];
PendingLocs.clear();
PendingArgFlags.clear();
return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
ArgFlags);
}
// Allocate to a register if possible, or else a stack slot.
unsigned Reg = State.AllocateReg(ArgGPRs);
unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
// If we reach this point and PendingLocs is non-empty, we must be at the
// end of a split argument that must be passed indirectly.
if (!PendingLocs.empty()) {
assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
for (auto &It : PendingLocs) {
if (Reg)
It.convertToReg(Reg);
else
It.convertToMem(StackOffset);
State.addLoc(It);
}
PendingLocs.clear();
PendingArgFlags.clear();
return false;
}
assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
if (ValVT == MVT::f32) {
LocVT = MVT::f32;
LocInfo = CCValAssign::Full;
}
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
void RISCVTargetLowering::analyzeInputArgs(
MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
Type *ArgTy = nullptr;
if (IsRet)
ArgTy = FType->getReturnType();
else if (Ins[i].isOrigArg())
ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n');
llvm_unreachable(nullptr);
}
}
}
void RISCVTargetLowering::analyzeOutputArgs(
MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
CallLoweringInfo *CLI) const {
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n");
llvm_unreachable(nullptr);
}
}
}
// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
// values.
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL) {
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
}
return Val;
}
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
const CCValAssign &VA, const SDLoc &DL) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
SDValue Val;
unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
if (VA.getLocInfo() == CCValAssign::Indirect)
return Val;
return convertLocVTToValVT(DAG, Val, VA, DL);
}
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL) {
EVT LocVT = VA.getLocVT();
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
break;
}
return Val;
}
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
const CCValAssign &VA, const SDLoc &DL) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
EVT LocVT = VA.getLocVT();
EVT ValVT = VA.getValVT();
EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
VA.getLocMemOffset(), /*Immutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val;
ISD::LoadExtType ExtType;
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
case CCValAssign::Indirect:
ExtType = ISD::NON_EXTLOAD;
break;
}
Val = DAG.getExtLoad(
ExtType, DL, LocVT, Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
return Val;
}
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
const CCValAssign &VA, const SDLoc &DL) {
assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
"Unexpected VA");
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
if (VA.isMemLoc()) {
// f64 is passed on the stack.
int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
return DAG.getLoad(MVT::f64, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
}
assert(VA.isRegLoc() && "Expected register VA assignment");
unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
SDValue Hi;
if (VA.getLocReg() == RISCV::X17) {
// Second half of f64 is passed on the stack.
int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
} else {
// Second half of f64 is passed in another GPR.
unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
// Transform physical registers into virtual registers.
SDValue RISCVTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
switch (CallConv) {
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
break;
}
MachineFunction &MF = DAG.getMachineFunction();
const Function &Func = MF.getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.arg_empty())
report_fatal_error(
"Functions with the interrupt attribute cannot have arguments!");
StringRef Kind =
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
report_fatal_error(
"Function interrupt attribute argument not supported!");
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT XLenVT = Subtarget.getXLenVT();
unsigned XLenInBytes = Subtarget.getXLen() / 8;
// Used with vargs to acumulate store chains.
std::vector<SDValue> OutChains;
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
else if (VA.isRegLoc())
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
if (VA.getLocInfo() == CCValAssign::Indirect) {
// If the original argument was split and passed by reference (e.g. i128
// on RV32), we need to load all parts of it here (using the same
// address).
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
unsigned ArgIndex = Ins[i].OrigArgIndex;
assert(Ins[i].PartOffset == 0);
while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
unsigned PartOffset = Ins[i + 1].PartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
DAG.getIntPtrConstant(PartOffset, DL));
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
MachinePointerInfo()));
++i;
}
continue;
}
InVals.push_back(ArgValue);
}
if (IsVarArg) {
ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
// Offset of the first variable argument from stack pointer, and size of
// the vararg save area. For now, the varargs save area is either zero or
// large enough to hold a0-a7.
int VaArgOffset, VarArgsSaveSize;
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
if (ArgRegs.size() == Idx) {
VaArgOffset = CCInfo.getNextStackOffset();
VarArgsSaveSize = 0;
} else {
VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
VaArgOffset = -VarArgsSaveSize;
}
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
RVFI->setVarArgsFrameIndex(FI);
// If saving an odd number of registers then create an extra stack slot to
// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
// offsets to even-numbered registered remain 2*XLEN-aligned.
if (Idx % 2) {
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes,
true);
VarArgsSaveSize += XLenInBytes;
}
// Copy the integer registers that may have been used for passing varargs
// to the vararg save area.
for (unsigned I = Idx; I < ArgRegs.size();
++I, VaArgOffset += XLenInBytes) {
const unsigned Reg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(ArgRegs[I], Reg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
MachinePointerInfo::getFixedStack(MF, FI));
cast<StoreSDNode>(Store.getNode())
->getMemOperand()
->setValue((Value *)nullptr);
OutChains.push_back(Store);
}
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
}
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens for vararg functions.
if (!OutChains.empty()) {
OutChains.push_back(Chain);
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
}
return Chain;
}
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
bool RISCVTargetLowering::IsEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const {
auto &Callee = CLI.Callee;
auto CalleeCC = CLI.CallConv;
auto IsVarArg = CLI.IsVarArg;
auto &Outs = CLI.Outs;
auto &Caller = MF.getFunction();
auto CallerCC = Caller.getCallingConv();
// Do not tail call opt functions with "disable-tail-calls" attribute.
if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
// Exception-handling functions need a special set of instructions to
// indicate a return to the hardware. Tail-calling another function would
// probably break this.
// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
// should be expanded as new function attributes are introduced.
if (Caller.hasFnAttribute("interrupt"))
return false;
// Do not tail call opt functions with varargs.
if (IsVarArg)
return false;
// Do not tail call opt if the stack is used to pass parameters.
if (CCInfo.getNextStackOffset() != 0)
return false;
// Do not tail call opt if any parameters need to be passed indirectly.
// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
// passed indirectly. So the address of the value will be passed in a
// register, or if not available, then the address is put on the stack. In
// order to pass indirectly, space on the stack often needs to be allocated
// in order to store the value. In this case the CCInfo.getNextStackOffset()
// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
// are passed CCValAssign::Indirect.
for (auto &VA : ArgLocs)
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
// Do not tail call opt if either caller or callee uses struct return
// semantics.
auto IsCallerStructRet = Caller.hasStructRetAttr();
auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
if (IsCallerStructRet || IsCalleeStructRet)
return false;
// Externally-defined functions with weak linkage should not be
// tail-called. The behaviour of branch instructions in this situation (as
// used for tail calls) is implementation-defined, so we cannot rely on the
// linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
if (GV->hasExternalWeakLinkage())
return false;
}
// The callee has to preserve all registers the caller needs to preserve.
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible
// but less efficient and uglier in LowerCall.
for (auto &Arg : Outs)
if (Arg.Flags.isByVal())
return false;
return true;
}
// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
// and output parameter nodes.
SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &DL = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT XLenVT = Subtarget.getXLenVT();
MachineFunction &MF = DAG.getMachineFunction();
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
// Check if it's really possible to do a tail call.
if (IsTailCall)
IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,
ArgLocs);
if (IsTailCall)
++NumTailCalls;
else if (CLI.CS && CLI.CS.isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
// Create local copies for byval args
SmallVector<SDValue, 8> ByValArgs;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (!Flags.isByVal())
continue;
SDValue Arg = OutVals[i];
unsigned Size = Flags.getByValSize();
unsigned Align = Flags.getByValAlign();
int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
/*IsVolatile=*/false,
/*AlwaysInline=*/false,
IsTailCall, MachinePointerInfo(),
MachinePointerInfo());
ByValArgs.push_back(FIPtr);
}
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Handle passing f64 on RV32D with a soft float ABI as a special case.
bool IsF64OnRV32DSoftABI =
VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
SDValue SplitF64 = DAG.getNode(
RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
unsigned RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
if (RegLo == RISCV::X17) {
// Second half of f64 is passed on the stack.
// Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
} else {
// Second half of f64 is passed in another GPR.
unsigned RegHigh = RegLo + 1;
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
}
continue;
}
// IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
// as any other MemLoc.
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
// If the original argument was split (e.g. i128), we need
// to store all parts of it here (and pass just one address).
unsigned ArgIndex = Outs[i].OrigArgIndex;
assert(Outs[i].PartOffset == 0);
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
unsigned PartOffset = Outs[i + 1].PartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
++i;
}
ArgValue = SpillSlot;
} else {
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
}
// Use local copy if it is a byval arg.
if (Flags.isByVal())
ArgValue = ByValArgs[j++];
if (VA.isRegLoc()) {
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
} else {
assert(VA.isMemLoc() && "Argument not register or memory");
assert(!IsTailCall && "Tail call not allowed if stack is used "
"for passing parameters");
// Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
SDValue Address =
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
}
}
// Join the stores, which are independent of one another.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
SDValue Glue;
// Build a sequence of copy-to-reg nodes, chained and glued together.
for (auto &Reg : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
Glue = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
// split it and then direct call can be matched by PseudoCALL.
if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
}
// The first call operand is the chain and the second is the target address.
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are
// known live into the call.
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
if (!IsTailCall) {
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
}
// Glue the call to the argument copies, if any.
if (Glue.getNode())
Ops.push_back(Glue);
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
}
Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getConstant(NumBytes, DL, PtrVT, true),
DAG.getConstant(0, DL, PtrVT, true),
Glue, DL);
Glue = Chain.getValue(1);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
// Copy all of the result registers out of their specified physreg.
for (auto &VA : RVLocs) {
// Copy the value out
SDValue RetValue =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
// Glue the RetValue to the end of the call sequence
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
SDValue RetValue2 =
DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
RetValue2);
}
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
InVals.push_back(RetValue);
}
return Chain;
}
bool RISCVTargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
return false;
}
return true;
}
SDValue
RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
// Stores the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
// Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
nullptr);
SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
SDValue Val = OutVals[i];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
// Handle returning f64 on RV32D with a soft float ABI.
assert(VA.isRegLoc() && "Expected return via registers");
SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
unsigned RegLo = VA.getLocReg();
unsigned RegHi = RegLo + 1;
Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
} else {
// Handle a 'normal' return.
Val = convertValVTToLocVT(DAG, Val, VA, DL);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
// Guarantee that all emitted copies are stuck together.
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
}
RetOps[0] = Chain; // Update chain.
// Add the glue node if we have it.
if (Glue.getNode()) {
RetOps.push_back(Glue);
}
// Interrupt service routines use different return instructions.
const Function &Func = DAG.getMachineFunction().getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.getReturnType()->isVoidTy())
report_fatal_error(
"Functions with the interrupt attribute must have void return type!");
MachineFunction &MF = DAG.getMachineFunction();
StringRef Kind =
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
unsigned RetOpc;
if (Kind == "user")
RetOpc = RISCVISD::URET_FLAG;
else if (Kind == "supervisor")
RetOpc = RISCVISD::SRET_FLAG;
else
RetOpc = RISCVISD::MRET_FLAG;
return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
}
return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
}
const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((RISCVISD::NodeType)Opcode) {
case RISCVISD::FIRST_NUMBER:
break;
case RISCVISD::RET_FLAG:
return "RISCVISD::RET_FLAG";
case RISCVISD::URET_FLAG:
return "RISCVISD::URET_FLAG";
case RISCVISD::SRET_FLAG:
return "RISCVISD::SRET_FLAG";
case RISCVISD::MRET_FLAG:
return "RISCVISD::MRET_FLAG";
case RISCVISD::CALL:
return "RISCVISD::CALL";
case RISCVISD::SELECT_CC:
return "RISCVISD::SELECT_CC";
case RISCVISD::BuildPairF64:
return "RISCVISD::BuildPairF64";
case RISCVISD::SplitF64:
return "RISCVISD::SplitF64";
case RISCVISD::TAIL:
return "RISCVISD::TAIL";
case RISCVISD::SLLW:
return "RISCVISD::SLLW";
case RISCVISD::SRAW:
return "RISCVISD::SRAW";
case RISCVISD::SRLW:
return "RISCVISD::SRLW";
}
return nullptr;
}
std::pair<unsigned, const TargetRegisterClass *>
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to a
// RISCV register class.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
return std::make_pair(0U, &RISCV::GPRRegClass);
default:
break;
}
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
return Builder.CreateFence(Ord);
if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Release);
return nullptr;
}
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Acquire);
return nullptr;
}
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
// point operations can't be used in an lr/sc sequence without breaking the
// forward-progress guarantee.
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
}
static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
if (XLen == 32) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
case AtomicRMWInst::Add:
return Intrinsic::riscv_masked_atomicrmw_add_i32;
case AtomicRMWInst::Sub:
return Intrinsic::riscv_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::riscv_masked_atomicrmw_nand_i32;
case AtomicRMWInst::Max:
return Intrinsic::riscv_masked_atomicrmw_max_i32;
case AtomicRMWInst::Min:
return Intrinsic::riscv_masked_atomicrmw_min_i32;
case AtomicRMWInst::UMax:
return Intrinsic::riscv_masked_atomicrmw_umax_i32;
case AtomicRMWInst::UMin:
return Intrinsic::riscv_masked_atomicrmw_umin_i32;
}
}
if (XLen == 64) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
case AtomicRMWInst::Add:
return Intrinsic::riscv_masked_atomicrmw_add_i64;
case AtomicRMWInst::Sub:
return Intrinsic::riscv_masked_atomicrmw_sub_i64;
case AtomicRMWInst::Nand:
return Intrinsic::riscv_masked_atomicrmw_nand_i64;
case AtomicRMWInst::Max:
return Intrinsic::riscv_masked_atomicrmw_max_i64;
case AtomicRMWInst::Min:
return Intrinsic::riscv_masked_atomicrmw_min_i64;
case AtomicRMWInst::UMax:
return Intrinsic::riscv_masked_atomicrmw_umax_i64;
case AtomicRMWInst::UMin:
return Intrinsic::riscv_masked_atomicrmw_umin_i64;
}
}
llvm_unreachable("Unexpected XLen\n");
}
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
unsigned XLen = Subtarget.getXLen();
Value *Ordering =
Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
Type *Tys[] = {AlignedAddr->getType()};
Function *LrwOpScwLoop = Intrinsic::getDeclaration(
AI->getModule(),
getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
if (XLen == 64) {
Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
}
Value *Result;
// Must pass the shift amount needed to sign extend the loaded value prior
// to performing a signed comparison for min/max. ShiftAmt is the number of
// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
// is the number of bits to left+right shift the value in order to
// sign-extend.
if (AI->getOperation() == AtomicRMWInst::Min ||
AI->getOperation() == AtomicRMWInst::Max) {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned ValWidth =
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
Value *SextShamt =
Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
Result = Builder.CreateCall(LrwOpScwLoop,
{AlignedAddr, Incr, Mask, SextShamt, Ordering});
} else {
Result =
Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
}
if (XLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *CI) const {
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
}
Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
unsigned XLen = Subtarget.getXLen();
Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
if (XLen == 64) {
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
}
Type *Tys[] = {AlignedAddr->getType()};
Function *MaskedCmpXchg =
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
Value *Result = Builder.CreateCall(
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
if (XLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}