diff options
Diffstat (limited to 'lib/CodeGen/CGBuiltin.cpp')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 1492 |
1 files changed, 1088 insertions, 404 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a718f2f19a..048103275d 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1,9 +1,8 @@ //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -27,7 +27,6 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -46,6 +45,25 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) { return std::min(High, std::max(Low, Value)); } +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { + ConstantInt *Byte; + switch (CGF.getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + return; + case LangOptions::TrivialAutoVarInitKind::Zero: + Byte = CGF.Builder.getInt8(0x00); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); + Byte = llvm::dyn_cast<llvm::ConstantInt>( + initializationPatternFor(CGF.CGM, Int8)); + break; + } + } + CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -300,6 +318,34 @@ static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } +// Build a plain volatile load. +static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; +} + +// Build a plain volatile store. +static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + Value *Value = CGF.EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = + CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); + Store->setVolatile(true); + return Store; +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -307,7 +353,7 @@ static Value *emitUnaryBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, Src0); } @@ -318,7 +364,7 @@ static Value *emitBinaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } @@ -330,7 +376,7 @@ static Value *emitTernaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -341,13 +387,13 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, {Src0, Src1}); } /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); + Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); Call->setDoesNotAccessMemory(); return Call; @@ -408,7 +454,7 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, "Arguments must be the same type. (Did you forget to make sure both " "arguments have the same integer width?)"); - llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); + Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); Carry = CGF.Builder.CreateExtractValue(Tmp, 1); return CGF.Builder.CreateExtractValue(Tmp, 0); @@ -419,7 +465,7 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF, int low, int high) { llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); llvm::Instruction *Call = CGF.Builder.CreateCall(F); Call->setMetadata(llvm::LLVMContext::MD_range, RNode); return Call; @@ -496,10 +542,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, + bool IsDynamic) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType, EmittedE); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -515,7 +562,7 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, bool IsDynamic) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -545,13 +592,15 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(Ptr->getType()->isPointerTy() && "Non-pointer passed to __builtin_object_size?"); - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + Function *F = + CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. Value *Min = Builder.getInt1((Type & 2) != 0); // For GCC compatibility, __builtin_object_size treat NULL as unknown size. Value *NullIsUnknown = Builder.getTrue(); - return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); + Value *Dynamic = Builder.getInt1(IsDynamic); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); } namespace { @@ -793,16 +842,16 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::ReturnsTwice); - llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); llvm::Value *Args[] = {Buf, Arg1}; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); - CS.setAttributes(ReturnsTwiceAttr); - return RValue::get(CS.getInstruction()); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); + CB->setAttributes(ReturnsTwiceAttr); + return RValue::get(CB); } // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, @@ -876,7 +925,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); if (BuiltinID == MSVCIntrin::_BitScanForward) { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Builder.CreateStore(ZeroCount, IndexAddress, false); @@ -884,7 +933,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); @@ -996,6 +1045,9 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Asm = "udf #251"; Constraints = "{r0}"; break; + case llvm::Triple::aarch64: + Asm = "brk #0xF003"; + Constraints = "{w0}"; } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = @@ -1003,9 +1055,9 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CI->setAttributes(NoReturnAttr); + return CI; } } llvm_unreachable("Incorrect MSVC intrinsic!"); @@ -1106,6 +1158,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + Fn->setDoesNotThrow(); // Attach 'noinline' at -Oz. if (CGM.getCodeGenOpts().OptimizeSize == 2) @@ -1330,8 +1383,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, } static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, - Value *&RecordPtr, CharUnits Align, Value *Func, - int Lvl) { + Value *&RecordPtr, CharUnits Align, + llvm::FunctionCallee Func, int Lvl) { const auto *RT = RType->getAs<RecordType>(); ASTContext &Context = CGF.getContext(); RecordDecl *RD = RT->getDecl()->getDefinition(); @@ -1466,7 +1519,7 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGM.getIntrinsic(IID, Ty); + Function *F = CGM.getIntrinsic(IID, Ty); return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } @@ -1735,6 +1788,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_dump_struct: { + llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy); + llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get( + LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true); + Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); @@ -1742,7 +1799,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QualType Arg0Type = Arg0->getType()->getPointeeType(); Value *RecordPtr = EmitScalarExpr(Arg0); - Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0); + Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, + {LLVMFuncType, Func}, 0); return RValue::get(Res); } @@ -1763,7 +1821,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Zero = llvm::Constant::getNullValue(ArgType); @@ -1783,7 +1841,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1800,7 +1858,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1817,7 +1875,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = @@ -1838,7 +1896,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = Builder.CreateCall(F, ArgValue); @@ -1854,7 +1912,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); @@ -1872,7 +1930,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, ArgValue); @@ -1898,7 +1956,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (CGM.getCodeGenOpts().OptimizationLevel == 0) return RValue::get(ArgValue); - Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); + Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); Value *Result = Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); @@ -1913,7 +1971,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); - EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), + EmitAlignmentAssumption(PtrValue, Ptr, + /*The expr loc is sufficient.*/ SourceLocation(), Alignment, OffsetValue); return RValue::get(PtrValue); } @@ -1923,7 +1982,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); Value *ArgValue = EmitScalarExpr(E->getArg(0)); - Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); + Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } case Builtin::BI__builtin_bswap16: @@ -1968,17 +2027,34 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); - if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) - // We can only reason about scalar types. + // FIXME: The allowance for Obj-C pointers and block pointers is historical + // and likely a mistake. + if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && + !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) + // Per the GCC documentation, only numeric constants are recognized after + // inlining. + return RValue::get(ConstantInt::get(ResultType, 0)); + + if (Arg->HasSideEffects(getContext())) + // The argument is unevaluated, so be conservative if it might have + // side-effects. return RValue::get(ConstantInt::get(ResultType, 0)); Value *ArgValue = EmitScalarExpr(Arg); - Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + if (ArgType->isObjCObjectPointerType()) { + // Convert Objective-C objects to id because we cannot distinguish between + // LLVM types for Obj-C classes as they are opaque. + ArgType = CGM.getContext().getObjCIdType(); + ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); + } + Function *F = + CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); return RValue::get(Result); } + case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -1986,8 +2062,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. + bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, - /*EmittedE=*/nullptr)); + /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -1997,17 +2074,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { - Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); + Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); + Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: @@ -2029,7 +2106,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Base = EmitScalarExpr(E->getArg(0)); Value *Exponent = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = Base->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); return RValue::get(Builder.CreateCall(F, {Base, Exponent})); } @@ -2130,6 +2207,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_flt_rounds: { + Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } + case Builtin::BI__builtin_fpclassify: { Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -2200,6 +2288,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, .getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); + initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); return RValue::get(AI); } @@ -2212,6 +2301,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); + initializeAlloca(*this, AI, Size, AlignmentInBytes); return RValue::get(AI); } @@ -2392,24 +2482,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // this instead of hard-coding 0, which is correct for most targets. int32_t Offset = 0; - Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); + Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); return RValue::get(Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI_ReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); + Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2445,9 +2535,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); - Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 - ? Intrinsic::eh_return_i32 - : Intrinsic::eh_return_i64); + Function *F = + CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 + : Intrinsic::eh_return_i64); Builder.CreateCall(F, {Int, Ptr}); Builder.CreateUnreachable(); @@ -2457,7 +2547,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BI__builtin_unwind_init: { - Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); + Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin_extend_pointer: { @@ -2498,12 +2588,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); - Address StackSaveSlot = - Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); + Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. - Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); + Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); Buf = Builder.CreateBitCast(Buf, Int8PtrTy); return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } @@ -2719,7 +2808,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, CGCallee::forDirect(Func), ReturnValueSlot(), Args); } @@ -2959,14 +3048,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Build and MDTuple of MDStrings and emit the intrinsic call. - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); return RValue::getIgnored(); } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, AnnVal->getType()); // Get the annotation string, go through casts. Sema requires this to be a @@ -3311,6 +3401,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_interlockedbittestandreset_nf: return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); + // These builtins exist to emit regular volatile loads and stores not + // affected by the -fms-volatile setting. + case Builtin::BI__iso_volatile_load8: + case Builtin::BI__iso_volatile_load16: + case Builtin::BI__iso_volatile_load32: + case Builtin::BI__iso_volatile_load64: + return RValue::get(EmitISOVolatileLoad(*this, E)); + case Builtin::BI__iso_volatile_store8: + case Builtin::BI__iso_volatile_store16: + case Builtin::BI__iso_volatile_store32: + case Builtin::BI__iso_volatile_store64: + return RValue::get(EmitISOVolatileStore(*this, E)); + case Builtin::BI__exception_code: case Builtin::BI_exception_code: return RValue::get(EmitSEHExceptionCode()); @@ -3348,7 +3451,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto & Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); + Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); return RValue::get(Builder.CreateCall(F)); } @@ -3666,21 +3769,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Any calls now have event arguments passed. if (NumArgs >= 7) { llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); - llvm::Type *EventPtrTy = EventTy->getPointerTo( + llvm::PointerType *EventPtrTy = EventTy->getPointerTo( CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *NumEvents = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); - llvm::Value *EventList = - E->getArg(4)->getType()->isArrayType() - ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() - : EmitScalarExpr(E->getArg(4)); - llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); - // Convert to generic address space. - EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = ClkEvent->getType()->isIntegerTy() - ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) - : Builder.CreatePointerCast(ClkEvent, EventPtrTy); + + // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments + // to be a null pointer constant (including `0` literal), we can take it + // into account and emit null pointer directly. + llvm::Value *EventWaitList = nullptr; + if (E->getArg(4)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventWaitList = E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() + : EmitScalarExpr(E->getArg(4)); + // Convert to generic address space. + EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy); + } + llvm::Value *EventRet = nullptr; + if (E->getArg(5)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventRet = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventRet = + Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy); + } + auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3692,8 +3809,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QueueTy, Int32Ty, RangeTy, Int32Ty, EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; - std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, - EventList, ClkEvent, Kernel, Block}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, + NumEvents, EventWaitList, EventRet, + Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. @@ -5065,6 +5183,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + // We don't allow fp/int overloading of intrinsics. + if (VTy->getElementType()->isFloatingPointTy() && + Int == Intrinsic::aarch64_neon_addp) + Int = Intrinsic::aarch64_neon_faddp; + break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) @@ -5262,7 +5387,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); @@ -5731,7 +5856,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, && "Can't fit 64-bit value in 32-bit register"); if (IsRead) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall(F, Metadata); if (MixedTypes) @@ -5745,7 +5870,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, return Call; } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); if (MixedTypes) { // Extend 32 bit write value to 64 bit to pass to write. @@ -5798,34 +5923,6 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { return true; } -Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - LoadSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, LoadSize); - Load->setVolatile(true); - return Load; -} - -Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Value = EmitScalarExpr(E->getArg(1)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - StoreSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::StoreInst *Store = - Builder.CreateAlignedStore(Value, Ptr, - StoreSize); - Store->setVolatile(true); - return Store; -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -5866,7 +5963,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6065,19 +6162,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); } - switch (BuiltinID) { - case ARM::BI__iso_volatile_load8: - case ARM::BI__iso_volatile_load16: - case ARM::BI__iso_volatile_load32: - case ARM::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case ARM::BI__iso_volatile_store8: - case ARM::BI__iso_volatile_store16: - case ARM::BI__iso_volatile_store32: - case ARM::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); - } - if (BuiltinID == ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); @@ -6818,7 +6902,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6956,7 +7040,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); - llvm::Value *F = + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); return Builder.CreateCall(F, Metadata); } @@ -7002,6 +7086,84 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Tagging Extensions (MTE) Intrinsics + Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; + switch (BuiltinID) { + case AArch64::BI__builtin_arm_irg: + MTEIntrinsicID = Intrinsic::aarch64_irg; break; + case AArch64::BI__builtin_arm_addg: + MTEIntrinsicID = Intrinsic::aarch64_addg; break; + case AArch64::BI__builtin_arm_gmi: + MTEIntrinsicID = Intrinsic::aarch64_gmi; break; + case AArch64::BI__builtin_arm_ldg: + MTEIntrinsicID = Intrinsic::aarch64_ldg; break; + case AArch64::BI__builtin_arm_stg: + MTEIntrinsicID = Intrinsic::aarch64_stg; break; + case AArch64::BI__builtin_arm_subp: + MTEIntrinsicID = Intrinsic::aarch64_subp; break; + } + + if (MTEIntrinsicID != Intrinsic::not_intrinsic) { + llvm::Type *T = ConvertType(E->getType()); + + if (MTEIntrinsicID == Intrinsic::aarch64_irg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *Mask = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + Mask = Builder.CreateZExt(Mask, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_addg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *TagOffset = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); + + ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); + } + // Although it is possible to supply a different return + // address (first arg) to this intrinsic, for now we set + // return address same as input address. + if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + return Builder.CreatePointerCast(RV, T); + } + // Although it is possible to supply a different tag (to set) + // to this intrinsic (as first arg), for now we supply + // the tag that is in input address arg (common use case). + if (MTEIntrinsicID == Intrinsic::aarch64_stg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + } + if (MTEIntrinsicID == Intrinsic::aarch64_subp) { + Value *PointerA = EmitScalarExpr(E->getArg(0)); + Value *PointerB = EmitScalarExpr(E->getArg(1)); + PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); + PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); + } + } + if (BuiltinID == AArch64::BI__builtin_arm_rsr || BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_rsrp || @@ -7052,25 +7214,27 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *RegisterType = Int64Ty; - llvm::Type *ValueType = Int32Ty; llvm::Type *Types[] = { RegisterType }; if (BuiltinID == AArch64::BI_ReadStatusReg) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - llvm::Value *Call = Builder.CreateCall(F, Metadata); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - return Builder.CreateTrunc(Call, ValueType); + return Builder.CreateCall(F, Metadata); } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); - ArgValue = Builder.CreateZExt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { - llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + + if (BuiltinID == AArch64::BI__builtin_sponentry) { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); return Builder.CreateCall(F); } @@ -7608,13 +7772,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); case NEON::BI__builtin_neon_vfmah_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vfmsh_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); // NEON intrinsic puts accumulator first, unlike the LLVM fma. @@ -7775,6 +7939,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); } + case NEON::BI__builtin_neon_vduph_lane_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vget_lane"); + } + case NEON::BI__builtin_neon_vduph_laneq_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vgetq_lane"); + } } llvm::VectorType *VTy = GetNeonType(this, Type); @@ -7845,11 +8017,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7863,7 +8035,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7879,7 +8051,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } @@ -8892,16 +9064,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case AArch64::BI__iso_volatile_load8: - case AArch64::BI__iso_volatile_load16: - case AArch64::BI__iso_volatile_load32: - case AArch64::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case AArch64::BI__iso_volatile_store8: - case AArch64::BI__iso_volatile_store16: - case AArch64::BI__iso_volatile_store32: - case AArch64::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); case AArch64::BI_BitScanForward: case AArch64::BI_BitScanForward64: return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); @@ -9139,6 +9301,20 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } +static Value *EmitX86CompressExpand(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, + bool IsCompress) { + llvm::Type *ResultTy = Ops[1]->getType(); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress + : Intrinsic::x86_avx512_mask_expand; + llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); + return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); +} + static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *ResultTy = Ops[1]->getType(); @@ -9184,10 +9360,50 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, } unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGF.CGM.getIntrinsic(IID, Ty); + Function *F = CGF.CGM.getIntrinsic(IID, Ty); return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } +static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + bool IsSigned) { + Value *Op0 = Ops[0]; + Value *Op1 = Ops[1]; + llvm::Type *Ty = Op0->getType(); + uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + + CmpInst::Predicate Pred; + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; + break; + case 0x5: + Pred = ICmpInst::ICMP_NE; + break; + case 0x6: + return llvm::Constant::getNullValue(Ty); // FALSE + case 0x7: + return llvm::Constant::getAllOnesValue(Ty); // TRUE + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); + Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); + return Res; +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -9278,6 +9494,25 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } +static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, bool IsSigned) { + unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); + llvm::Type *Ty = Ops[1]->getType(); + + Value *Res; + if (Rnd != 4) { + Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round + : Intrinsic::x86_avx512_uitofp_round; + Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); + Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); + } else { + Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) + : CGF.Builder.CreateUIToFP(Ops[0], Ty); + } + + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *Ty = Ops[0]->getType(); @@ -9650,10 +9885,11 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); - cast<llvm::GlobalValue>(Func)->setDSOLocal(true); - cast<llvm::GlobalValue>(Func)->setDLLStorageClass( - llvm::GlobalValue::DefaultStorageClass); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); + cast<llvm::GlobalValue>(Func.getCallee()) + ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } @@ -9722,7 +9958,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { @@ -9753,13 +9989,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: case X86::BI__builtin_ia32_lzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: case X86::BI__builtin_ia32_tzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_undef128: @@ -9833,7 +10069,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_xsavec: case X86::BI__builtin_ia32_xsavec64: case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: { + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: { Intrinsic::ID ID; #define INTRINSIC_X86_XSAVE_ID(NAME) \ case X86::BI__builtin_ia32_##NAME: \ @@ -9853,6 +10091,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, INTRINSIC_X86_XSAVE_ID(xsavec64); INTRINSIC_X86_XSAVE_ID(xsaves); INTRINSIC_X86_XSAVE_ID(xsaves64); + INTRINSIC_X86_XSAVE_ID(xsetbv); + case X86::BI_xsetbv: + ID = Intrinsic::x86_xsetbv; + break; } #undef INTRINSIC_X86_XSAVE_ID Value *Mhi = Builder.CreateTrunc( @@ -9862,6 +10104,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: @@ -9930,6 +10175,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtq2mask512: return EmitX86ConvertToMask(*this, Ops[0]); + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: @@ -10073,6 +10327,262 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); + + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); + + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + IID = Intrinsic::x86_avx512_mask_gather3div2_df; + break; + case X86::BI__builtin_ia32_gather3div2di: + IID = Intrinsic::x86_avx512_mask_gather3div2_di; + break; + case X86::BI__builtin_ia32_gather3div4df: + IID = Intrinsic::x86_avx512_mask_gather3div4_df; + break; + case X86::BI__builtin_ia32_gather3div4di: + IID = Intrinsic::x86_avx512_mask_gather3div4_di; + break; + case X86::BI__builtin_ia32_gather3div4sf: + IID = Intrinsic::x86_avx512_mask_gather3div4_sf; + break; + case X86::BI__builtin_ia32_gather3div4si: + IID = Intrinsic::x86_avx512_mask_gather3div4_si; + break; + case X86::BI__builtin_ia32_gather3div8sf: + IID = Intrinsic::x86_avx512_mask_gather3div8_sf; + break; + case X86::BI__builtin_ia32_gather3div8si: + IID = Intrinsic::x86_avx512_mask_gather3div8_si; + break; + case X86::BI__builtin_ia32_gather3siv2df: + IID = Intrinsic::x86_avx512_mask_gather3siv2_df; + break; + case X86::BI__builtin_ia32_gather3siv2di: + IID = Intrinsic::x86_avx512_mask_gather3siv2_di; + break; + case X86::BI__builtin_ia32_gather3siv4df: + IID = Intrinsic::x86_avx512_mask_gather3siv4_df; + break; + case X86::BI__builtin_ia32_gather3siv4di: + IID = Intrinsic::x86_avx512_mask_gather3siv4_di; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; + break; + case X86::BI__builtin_ia32_gather3siv4si: + IID = Intrinsic::x86_avx512_mask_gather3siv4_si; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; + break; + case X86::BI__builtin_ia32_gather3siv8si: + IID = Intrinsic::x86_avx512_mask_gather3siv8_si; + break; + case X86::BI__builtin_ia32_gathersiv8df: + IID = Intrinsic::x86_avx512_mask_gather_dpd_512; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_dps_512; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + IID = Intrinsic::x86_avx512_mask_gather_qpd_512; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_qps_512; + break; + case X86::BI__builtin_ia32_gathersiv8di: + IID = Intrinsic::x86_avx512_mask_gather_dpq_512; + break; + case X86::BI__builtin_ia32_gathersiv16si: + IID = Intrinsic::x86_avx512_mask_gather_dpi_512; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + IID = Intrinsic::x86_avx512_mask_gather_qpq_512; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + IID = Intrinsic::x86_avx512_mask_gather_qpi_512; + break; + } + + unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), + Ops[2]->getType()->getVectorNumElements()); + Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } + + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_dps_512; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_qps_512; + break; + case X86::BI__builtin_ia32_scattersiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; + break; + case X86::BI__builtin_ia32_scattersiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; + break; + case X86::BI__builtin_ia32_scattersiv2df: + IID = Intrinsic::x86_avx512_mask_scattersiv2_df; + break; + case X86::BI__builtin_ia32_scattersiv2di: + IID = Intrinsic::x86_avx512_mask_scattersiv2_di; + break; + case X86::BI__builtin_ia32_scattersiv4df: + IID = Intrinsic::x86_avx512_mask_scattersiv4_df; + break; + case X86::BI__builtin_ia32_scattersiv4di: + IID = Intrinsic::x86_avx512_mask_scattersiv4_di; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; + break; + case X86::BI__builtin_ia32_scattersiv4si: + IID = Intrinsic::x86_avx512_mask_scattersiv4_si; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; + break; + case X86::BI__builtin_ia32_scattersiv8si: + IID = Intrinsic::x86_avx512_mask_scattersiv8_si; + break; + } + + unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(), + Ops[3]->getType()->getVectorNumElements()); + Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } + case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); @@ -10693,6 +11203,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return EmitX86vpcom(*this, Ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return EmitX86vpcom(*this, Ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: @@ -11336,6 +11856,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); +// AVX512 bf16 intrinsics + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { + Ops[2] = getMaskVecValue(*this, Ops[2], + Ops[0]->getType()->getVectorNumElements()); + Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } + case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); @@ -11386,9 +11914,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); // return Builder.CreateCall(F, Ops); llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *Val = Builder.CreateOr( - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64), - Builder.CreateZExt(Ops[0], Int128Ty)); + Value *HighPart128 = + Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); + Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); + Value *Val = Builder.CreateOr(HighPart128, LowPart128); Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), llvm::ConstantInt::get(Int128Ty, 0x3f)); Value *Res; @@ -11465,7 +11994,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_AddressOfReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } case X86::BI__stosb: { @@ -11484,9 +12013,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA); + CI->setAttributes(NoReturnAttr); + return CI; } case X86::BI__readfsbyte: case X86::BI__readfsword: @@ -12001,7 +12530,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, + llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -12023,7 +12552,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, + llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); @@ -12039,7 +12568,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); - Value *F = + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } @@ -12080,13 +12609,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt32Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_frexp_exph: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt16Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } @@ -12111,6 +12640,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); @@ -12160,7 +12697,7 @@ static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, for (unsigned I = 0; I < NumArgs; ++I) Args[I] = CGF.EmitScalarExpr(E->getArg(I)); Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID); Value *Call = CGF.Builder.CreateCall(F, Args); Value *CC = CGF.Builder.CreateExtractValue(Call, 1); CGF.Builder.CreateStore(CC, CCPtr); @@ -12173,30 +12710,30 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, case SystemZ::BI__builtin_tbegin: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbegin_nofloat: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbeginc: { Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tabort: { Value *Data = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); } case SystemZ::BI__builtin_non_tx_store: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Data = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); + Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); return Builder.CreateCall(F, {Data, Address}); } @@ -12488,8 +13025,252 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, } } -Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +namespace { +// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. +struct NVPTXMmaLdstInfo { + unsigned NumResults; // Number of elements to load/store + // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. + unsigned IID_col; + unsigned IID_row; +}; + +#define MMA_INTR(geom_op_type, layout) \ + Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride +#define MMA_LDST(n, geom_op_type) \ + { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } + +static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { + switch (BuiltinID) { + // FP MMA loads + case NVPTX::BI__hmma_m16n16k16_ld_a: + return MMA_LDST(8, m16n16k16_load_a_f16); + case NVPTX::BI__hmma_m16n16k16_ld_b: + return MMA_LDST(8, m16n16k16_load_b_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + return MMA_LDST(4, m16n16k16_load_c_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + return MMA_LDST(8, m16n16k16_load_c_f32); + case NVPTX::BI__hmma_m32n8k16_ld_a: + return MMA_LDST(8, m32n8k16_load_a_f16); + case NVPTX::BI__hmma_m32n8k16_ld_b: + return MMA_LDST(8, m32n8k16_load_b_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f16: + return MMA_LDST(4, m32n8k16_load_c_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f32: + return MMA_LDST(8, m32n8k16_load_c_f32); + case NVPTX::BI__hmma_m8n32k16_ld_a: + return MMA_LDST(8, m8n32k16_load_a_f16); + case NVPTX::BI__hmma_m8n32k16_ld_b: + return MMA_LDST(8, m8n32k16_load_b_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f16: + return MMA_LDST(4, m8n32k16_load_c_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + return MMA_LDST(8, m8n32k16_load_c_f32); + + // Integer MMA loads + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + return MMA_LDST(2, m16n16k16_load_a_s8); + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + return MMA_LDST(2, m16n16k16_load_a_u8); + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + return MMA_LDST(2, m16n16k16_load_b_s8); + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + return MMA_LDST(2, m16n16k16_load_b_u8); + case NVPTX::BI__imma_m16n16k16_ld_c: + return MMA_LDST(8, m16n16k16_load_c_s32); + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + return MMA_LDST(4, m32n8k16_load_a_s8); + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + return MMA_LDST(4, m32n8k16_load_a_u8); + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + return MMA_LDST(1, m32n8k16_load_b_s8); + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + return MMA_LDST(1, m32n8k16_load_b_u8); + case NVPTX::BI__imma_m32n8k16_ld_c: + return MMA_LDST(8, m32n8k16_load_c_s32); + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + return MMA_LDST(1, m8n32k16_load_a_s8); + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + return MMA_LDST(1, m8n32k16_load_a_u8); + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + return MMA_LDST(4, m8n32k16_load_b_s8); + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + return MMA_LDST(4, m8n32k16_load_b_u8); + case NVPTX::BI__imma_m8n32k16_ld_c: + return MMA_LDST(8, m8n32k16_load_c_s32); + + // Sub-integer MMA loads. + // Only row/col layout is supported by A/B fragments. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_c: + return MMA_LDST(2, m8n8k32_load_c_s32); + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; + case NVPTX::BI__bmma_m8n8k128_ld_c: + return MMA_LDST(2, m8n8k128_load_c_s32); + + // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike + // PTX and LLVM IR where stores always use fragment D, NVCC builtins always + // use fragment C for both loads and stores. + // FP MMA stores. + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + return MMA_LDST(4, m16n16k16_store_d_f16); + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + return MMA_LDST(8, m16n16k16_store_d_f32); + case NVPTX::BI__hmma_m32n8k16_st_c_f16: + return MMA_LDST(4, m32n8k16_store_d_f16); + case NVPTX::BI__hmma_m32n8k16_st_c_f32: + return MMA_LDST(8, m32n8k16_store_d_f32); + case NVPTX::BI__hmma_m8n32k16_st_c_f16: + return MMA_LDST(4, m8n32k16_store_d_f16); + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + return MMA_LDST(8, m8n32k16_store_d_f32); + + // Integer and sub-integer MMA stores. + // Another naming quirk. Unlike other MMA builtins that use PTX types in the + // name, integer loads/stores use LLVM's i32. + case NVPTX::BI__imma_m16n16k16_st_c_i32: + return MMA_LDST(8, m16n16k16_store_d_s32); + case NVPTX::BI__imma_m32n8k16_st_c_i32: + return MMA_LDST(8, m32n8k16_store_d_s32); + case NVPTX::BI__imma_m8n32k16_st_c_i32: + return MMA_LDST(8, m8n32k16_store_d_s32); + case NVPTX::BI__imma_m8n8k32_st_c_i32: + return MMA_LDST(2, m8n8k32_store_d_s32); + case NVPTX::BI__bmma_m8n8k128_st_c_i32: + return MMA_LDST(2, m8n8k128_store_d_s32); + + default: + llvm_unreachable("Unknown MMA builtin"); + } +} +#undef MMA_LDST +#undef MMA_INTR + + +struct NVPTXMmaInfo { + unsigned NumEltsA; + unsigned NumEltsB; + unsigned NumEltsC; + unsigned NumEltsD; + std::array<unsigned, 8> Variants; + + unsigned getMMAIntrinsic(int Layout, bool Satf) { + unsigned Index = Layout * 2 + Satf; + if (Index >= Variants.size()) + return 0; + return Variants[Index]; + } +}; + + // Returns an intrinsic that matches Layout and Satf for valid combinations of + // Layout and Satf, 0 otherwise. +static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { + // clang-format off +#define MMA_VARIANTS(geom, type) {{ \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ + }} +// Sub-integer MMA only supports row.col layout. +#define MMA_VARIANTS_I4(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} +// b1 MMA does not support .satfinite. +#define MMA_VARIANTS_B1(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + 0, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} + // clang-format on + switch (BuiltinID) { + // FP MMA + // Note that 'type' argument of MMA_VARIANT uses D_C notation, while + // NumEltsN of return value are ordered as A,B,C,D. + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)}; + + // Integer MMA + case NVPTX::BI__imma_m16n16k16_mma_s8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)}; + case NVPTX::BI__imma_m16n16k16_mma_u8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)}; + case NVPTX::BI__imma_m32n8k16_mma_s8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)}; + case NVPTX::BI__imma_m32n8k16_mma_u8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)}; + case NVPTX::BI__imma_m8n32k16_mma_s8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)}; + case NVPTX::BI__imma_m8n32k16_mma_u8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)}; + + // Sub-integer MMA + case NVPTX::BI__imma_m8n8k32_mma_s4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)}; + case NVPTX::BI__imma_m8n8k32_mma_u4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)}; + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: + return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)}; + default: + llvm_unreachable("Unexpected builtin ID."); + } +#undef MMA_VARIANTS +#undef MMA_VARIANTS_I4 +#undef MMA_VARIANTS_B1 +} + +} // namespace + +Value * +CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); clang::CharUnits Align = @@ -12569,7 +13350,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *Val = EmitScalarExpr(E->getArg(1)); // atomicrmw only deals with integer arguments so we need to use // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. - Value *FnALAF32 = + Function *FnALAF32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); return Builder.CreateCall(FnALAF32, {Ptr, Val}); } @@ -12579,7 +13360,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Value *Val = EmitScalarExpr(E->getArg(1)); // atomicrmw only deals with integer arguments, so we need to use // LLVM's nvvm_atomic_load_add_f64 intrinsic. - Value *FnALAF64 = + Function *FnALAF64 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); return Builder.CreateCall(FnALAF64, {Ptr, Val}); } @@ -12587,7 +13368,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALI32 = + Function *FnALI32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); return Builder.CreateCall(FnALI32, {Ptr, Val}); } @@ -12595,7 +13376,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_dec_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALD32 = + Function *FnALD32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); return Builder.CreateCall(FnALD32, {Ptr, Val}); } @@ -12752,6 +13533,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Builder.CreateStore(Pred, PredOutPtr); return Builder.CreateExtractValue(ResultPair, 0); } + + // FP MMA loads case NVPTX::BI__hmma_m16n16k16_ld_a: case NVPTX::BI__hmma_m16n16k16_ld_b: case NVPTX::BI__hmma_m16n16k16_ld_c_f16: @@ -12763,7 +13546,33 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_ld_a: case NVPTX::BI__hmma_m8n32k16_ld_b: case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: { + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + // Integer MMA loads. + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + case NVPTX::BI__imma_m16n16k16_ld_c: + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + case NVPTX::BI__imma_m32n8k16_ld_c: + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + case NVPTX::BI__imma_m8n32k16_ld_c: + // Sub-integer MMA loads. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + case NVPTX::BI__imma_m8n8k32_ld_c: + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + case NVPTX::BI__bmma_m8n8k128_ld_c: + { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12771,82 +13580,28 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride; - NumResults = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Value *Result = Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); // Save returned values. - for (unsigned i = 0; i < NumResults; ++i) { - Builder.CreateAlignedStore( - Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), - Dst.getElementType()), - Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), - CharUnits::fromQuantity(4)); + assert(II.NumResults); + if (II.NumResults == 1) { + Builder.CreateAlignedStore(Result, Dst.getPointer(), + CharUnits::fromQuantity(4)); + } else { + for (unsigned i = 0; i < II.NumResults; ++i) { + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), + Dst.getElementType()), + Builder.CreateGEP(Dst.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + } } return Result; } @@ -12856,7 +13611,12 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m32n8k16_st_c_f16: case NVPTX::BI__hmma_m32n8k16_st_c_f32: case NVPTX::BI__hmma_m8n32k16_st_c_f16: - case NVPTX::BI__hmma_m8n32k16_st_c_f32: { + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + case NVPTX::BI__imma_m16n16k16_st_c_i32: + case NVPTX::BI__imma_m32n8k16_st_c_i32: + case NVPTX::BI__imma_m8n32k16_st_c_i32: + case NVPTX::BI__imma_m8n8k32_st_c_i32: + case NVPTX::BI__bmma_m8n8k128_st_c_i32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12864,45 +13624,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults = 8; - // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet - // for some reason nvcc builtins use _c_. - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } - Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Function *Intrinsic = + CGM.getIntrinsic(IID, Dst->getType()); llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); SmallVector<Value *, 10> Values = {Dst}; - for (unsigned i = 0; i < NumResults; ++i) { + for (unsigned i = 0; i < II.NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); @@ -12926,7 +13656,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_mma_f16f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: { + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + case NVPTX::BI__imma_m16n16k16_mma_s8: + case NVPTX::BI__imma_m16n16k16_mma_u8: + case NVPTX::BI__imma_m32n8k16_mma_s8: + case NVPTX::BI__imma_m32n8k16_mma_u8: + case NVPTX::BI__imma_m8n32k16_mma_s8: + case NVPTX::BI__imma_m8n32k16_mma_u8: + case NVPTX::BI__imma_m8n8k32_mma_s4: + case NVPTX::BI__imma_m8n8k32_mma_u4: + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); @@ -12938,119 +13677,40 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (Layout < 0 || Layout > 3) return nullptr; llvm::APSInt SatfArg; - if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1) + SatfArg = 0; // .b1 does not have satf argument. + else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) return nullptr; bool Satf = SatfArg.getSExtValue(); - - // clang-format off -#define MMA_VARIANTS(geom, type) {{ \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ - }} - // clang-format on - - auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { - unsigned Index = Layout * 2 + Satf; - assert(Index < 8); - return Variants[Index]; - }; - unsigned IID; - unsigned NumEltsC; - unsigned NumEltsD; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } -#undef MMA_VARIANTS + NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); + unsigned IID = MI.getMMAIntrinsic(Layout, Satf); + if (IID == 0) // Unsupported combination of Layout/Satf. + return nullptr; SmallVector<Value *, 24> Values; Function *Intrinsic = CGM.getIntrinsic(IID); - llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); + llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); // Load A - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < MI.NumEltsA; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcA.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, AType)); } // Load B - for (unsigned i = 0; i < 8; ++i) { + llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); + for (unsigned i = 0; i < MI.NumEltsB; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcB.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, BType)); } // Load C - llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); - for (unsigned i = 0; i < NumEltsC; ++i) { + llvm::Type *CType = + Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); + for (unsigned i = 0; i < MI.NumEltsC; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcC.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13059,7 +13719,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, } Value *Result = Builder.CreateCall(Intrinsic, Values); llvm::Type *DType = Dst.getElementType(); - for (unsigned i = 0; i < NumEltsD; ++i) + for (unsigned i = 0; i < MI.NumEltsD; ++i) Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13077,7 +13737,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_memory_size: { llvm::Type *ResultType = ConvertType(E->getType()); Value *I = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); return Builder.CreateCall(Callee, I); } case WebAssembly::BI__builtin_wasm_memory_grow: { @@ -13086,37 +13746,61 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)) }; - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } + case WebAssembly::BI__builtin_wasm_memory_init: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + llvm::APSInt MemConst; + if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + if (!MemConst.isNullValue()) + ErrorUnsupported(E, "non-zero memory index"); + Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst), + llvm::ConstantInt::get(getLLVMContext(), MemConst), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), + EmitScalarExpr(E->getArg(4))}; + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init); + return Builder.CreateCall(Callee, Args); + } + case WebAssembly::BI__builtin_wasm_data_drop: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); + return Builder.CreateCall(Callee, {Arg}); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); return Builder.CreateCall(Callee, {Tag, Obj}); } - case WebAssembly::BI__builtin_wasm_rethrow: { - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); + case WebAssembly::BI__builtin_wasm_rethrow_in_catch: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_notify: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Count = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: @@ -13127,7 +13811,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13139,7 +13823,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13149,7 +13833,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13159,7 +13843,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13252,14 +13936,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } @@ -13289,19 +13973,19 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, llvm_unreachable("unexpected builtin ID"); } Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } |