[SelectionDAG] Add FSHL/FSHR support to computeKnownBits

Also exposes an issue in DAGCombiner::visitFunnelShift where we were assuming the shift amount had the result type (after legalization it'll have the targets shift amount type).
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cad130e..93a1ab1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6966,8 +6966,10 @@
 
   // fold (fshl N0, N1, 0) -> N0
   // fold (fshr N0, N1, 0) -> N1
-  if (DAG.MaskedValueIsZero(N2, APInt::getAllOnesValue(BitWidth)))
-    return IsFSHL ? N0 : N1;
+  if (isPowerOf2_32(BitWidth))
+    if (DAG.MaskedValueIsZero(
+            N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
+      return IsFSHL ? N0 : N1;
 
   // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ceb8689..6241af1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2679,6 +2679,39 @@
       Known.One.ashrInPlace(Shift);
     }
     break;
+  case ISD::FSHL:
+  case ISD::FSHR:
+    if (ConstantSDNode *C =
+            isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+      unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+      // For fshl, 0-shift returns the 1st arg.
+      // For fshr, 0-shift returns the 2nd arg.
+      if (Amt == 0) {
+        Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1),
+                                 DemandedElts, Depth + 1);
+        break;
+      }
+
+      // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
+      // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
+      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+      Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      if (Opcode == ISD::FSHL) {
+        Known.One <<= Amt;
+        Known.Zero <<= Amt;
+        Known2.One.lshrInPlace(BitWidth - Amt);
+        Known2.Zero.lshrInPlace(BitWidth - Amt);
+      } else {
+        Known.One <<= BitWidth - Amt;
+        Known.Zero <<= BitWidth - Amt;
+        Known2.One.lshrInPlace(Amt);
+        Known2.Zero.lshrInPlace(Amt);
+      }
+      Known.One |= Known2.One;
+      Known.Zero |= Known2.Zero;
+    }
+    break;
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     unsigned EBits = EVT.getScalarSizeInBits();
diff --git a/llvm/test/CodeGen/X86/known-bits.ll b/llvm/test/CodeGen/X86/known-bits.ll
index 3f7e127..8f3b983 100644
--- a/llvm/test/CodeGen/X86/known-bits.ll
+++ b/llvm/test/CodeGen/X86/known-bits.ll
@@ -302,17 +302,12 @@
 define i32 @knownbits_fshl(i32 %a0) nounwind {
 ; X32-LABEL: knownbits_fshl:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $-1, %eax
-; X32-NEXT:    shrdl $27, %ecx, %eax
-; X32-NEXT:    andl $3, %eax
+; X32-NEXT:    movl $3, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: knownbits_fshl:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $-1, %eax
-; X64-NEXT:    shrdl $27, %edi, %eax
-; X64-NEXT:    andl $3, %eax
+; X64-NEXT:    movl $3, %eax
 ; X64-NEXT:    retq
   %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
   %2 = and i32 %1, 3
@@ -322,17 +317,12 @@
 define i32 @knownbits_fshr(i32 %a0) nounwind {
 ; X32-LABEL: knownbits_fshr:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl $-1, %eax
-; X32-NEXT:    shrdl $5, %ecx, %eax
-; X32-NEXT:    andl $3, %eax
+; X32-NEXT:    movl $3, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: knownbits_fshr:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $-1, %eax
-; X64-NEXT:    shrdl $5, %edi, %eax
-; X64-NEXT:    andl $3, %eax
+; X64-NEXT:    movl $3, %eax
 ; X64-NEXT:    retq
   %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
   %2 = and i32 %1, 3