[TargetLowering] SimplifyDemandedBits - add ISD::INSERT_SUBVECTOR support
llvm-svn: 358019
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ab934d1..4d5dae7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -595,6 +595,45 @@
return false;
}
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+
+ // If index isn't constant, assume we need the original demanded base
+ // elements and ALL the inserted subvector elements.
+ APInt BaseElts = DemandedElts;
+ APInt SubElts = APInt::getAllOnesValue(NumSubElts);
+ if (isa<ConstantSDNode>(Op.getOperand(2))) {
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
+ if (Idx.ule(NumElts - NumSubElts)) {
+ unsigned SubIdx = Idx.getZExtValue();
+ SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+ }
+ }
+
+ KnownBits KnownSub, KnownBase;
+ if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+ Depth + 1))
+ return true;
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!SubElts) {
+ Known.One &= KnownSub.One;
+ Known.Zero &= KnownSub.Zero;
+ }
+ if (!!BaseElts) {
+ Known.One &= KnownBase.One;
+ Known.Zero &= KnownBase.Zero;
+ }
+ break;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 7a7b79c..96bbadb 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1069,10 +1069,9 @@
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kmovw %edi, %k3
-; KNL-NEXT: movb $1, %al
-; KNL-NEXT: kmovw %eax, %k4
-; KNL-NEXT: kshiftrw $5, %k0, %k5
-; KNL-NEXT: kxorw %k4, %k5, %k4
+; KNL-NEXT: kshiftrw $5, %k0, %k4
+; KNL-NEXT: kxnorw %k0, %k0, %k5
+; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $10, %k4, %k4
; KNL-NEXT: kxorw %k4, %k0, %k4
@@ -1091,8 +1090,7 @@
; SKX-LABEL: test16:
; SKX: ## %bb.0:
; SKX-NEXT: kmovq %rdi, %k0
-; SKX-NEXT: movb $1, %al
-; SKX-NEXT: kmovd %eax, %k1
+; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: kshiftrq $5, %k0, %k2
; SKX-NEXT: kxorq %k1, %k2, %k1
; SKX-NEXT: kshiftlq $63, %k1, %k1
@@ -1104,8 +1102,7 @@
; AVX512BW-LABEL: test16:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovq %rdi, %k0
-; AVX512BW-NEXT: movb $1, %al
-; AVX512BW-NEXT: kmovd %eax, %k1
+; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
@@ -1125,10 +1122,9 @@
; AVX512DQ-NEXT: kmovw %ecx, %k1
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kmovw %edi, %k3
-; AVX512DQ-NEXT: movb $1, %al
-; AVX512DQ-NEXT: kmovw %eax, %k4
-; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5
-; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
+; AVX512DQ-NEXT: kshiftrw $5, %k0, %k4
+; AVX512DQ-NEXT: kxnorw %k0, %k0, %k5
+; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k0, %k0
@@ -1147,10 +1143,9 @@
; X86-LABEL: test16:
; X86: ## %bb.0:
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
-; X86-NEXT: movb $1, %al
-; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: kshiftrq $5, %k0, %k2
-; X86-NEXT: kxorq %k1, %k2, %k1
+; X86-NEXT: kshiftrq $5, %k0, %k1
+; X86-NEXT: kxnorw %k0, %k0, %k2
+; X86-NEXT: kxorq %k2, %k1, %k1
; X86-NEXT: kshiftlq $63, %k1, %k1
; X86-NEXT: kshiftrq $58, %k1, %k1
; X86-NEXT: kxorq %k1, %k0, %k0