[SIL] Add UTF-8 string `SymbolicValue`. (#17235)

Add a UTF-8 string case to `SymbolicValue` and support it as a `graph_op`
attribute value.

Also do some other NFC cleanup.
diff --git a/include/swift/SIL/SILConstants.h b/include/swift/SIL/SILConstants.h
index ab52927..3f1deb2 100644
--- a/include/swift/SIL/SILConstants.h
+++ b/include/swift/SIL/SILConstants.h
@@ -28,6 +28,7 @@
 
 struct APIntSymbolicValue;
 struct APFloatSymbolicValue;
+struct StringSymbolicValue;
 struct AddressSymbolicValue;
 struct AggregateSymbolicValue;
 
@@ -79,16 +80,20 @@
     /// metatype value.
     RK_Inst,
 
-    /// This value is represented with a bump pointer allocated APInt.
+    /// This value is represented with a bump-pointer allocated APInt.
     /// TODO: We could store small integers into the union inline to avoid
     /// allocations if it ever matters.
     RK_Integer,
 
-    /// This value is represented with a bump pointer allocated APFloat.
+    /// This value is represented with a bump-pointer allocated APFloat.
     /// TODO: We could store small floats into the union inline to avoid
     /// allocations if it ever matters.
     RK_Float,
 
+    /// This value is represented with a bump-pointer allocated char array
+    /// representing a UTF-8 encoded string.
+    RK_String,
+
     /// This value is a pointer to a tracked memory location, along with zero
     /// or more indices (tuple indices, struct field indices, etc) into the
     /// value if it is an aggregate.
@@ -135,7 +140,11 @@
 
     /// When this SymbolicValue is of "Float" kind, this pointer stores
     /// information about the APFloat value it holds.
-    APFloatSymbolicValue *float_;
+    APFloatSymbolicValue *floatingPoint;
+
+    /// When this SymbolicValue is of "String" kind, this pointer stores
+    /// information about the StringRef value it holds.
+    StringSymbolicValue *string;
 
     /// When this SymbolicValue is of "Address" kind, this pointer stores
     /// info about the base and the indices for the address.
@@ -239,6 +248,13 @@
 
   APFloat getFloatValue() const;
 
+  // Returns a SymbolicValue representing a UTF-8 encoded string.
+  static SymbolicValue getString(const StringRef string,
+                                 llvm::BumpPtrAllocator &allocator);
+
+  // Returns the UTF-8 encoded string underlying a SymbolicValue.
+  StringRef getStringValue() const;
+
   /// Get a SymbolicValue corresponding to a memory object with an optional
   /// list of indices into it.  This is used by (e.g.) a struct_element_addr
   /// of a stack_alloc.
@@ -262,9 +278,6 @@
 
   ArrayRef<SymbolicValue> getAggregateValue() const;
 
-  // TODO: getStringValue.
-
-
   /// Given that this is an 'Unknown' value, emit diagnostic notes providing
   /// context about what the problem is.
   void emitUnknownDiagnosticNotes();
diff --git a/lib/ParseSIL/ParseSIL.cpp b/lib/ParseSIL/ParseSIL.cpp
index 7fce830..464f4d4 100644
--- a/lib/ParseSIL/ParseSIL.cpp
+++ b/lib/ParseSIL/ParseSIL.cpp
@@ -893,6 +893,7 @@
 /// - A floating point datatype and literal (f64 3.14).
 ///   - The literal value may be in either decimal format or the hexadecimal
 ///     format used by the 'float_literal' instruction.
+/// - A UTF-8 string literal ("foo").
 /// - A metatype (the instance type is parsed) ($Float).
 /// - An aggregate ([i32 1, i64 2, f32 3.0]).
 ///   - Aggregates values represent constant arrays/structs/tuples.
@@ -997,12 +998,10 @@
   }
   // Handle string literals.
   if (P.Tok.is(tok::string_literal)) {
-    // TODO: Uncomment when `getStringValue` is implemented.
-    // StringRef rawString = P.Tok.getText().drop_front().drop_back();
-    // value = SymbolicValue::getStringValue(rawString, allocator);
-    // return false;
-    P.diagnose(P.Tok, diag::sil_graph_op_unhandled_attr_value);
-    return true;
+    StringRef rawString = P.Tok.getText().drop_front().drop_back();
+    value = SymbolicValue::getString(rawString, allocator);
+    P.consumeToken(tok::string_literal);
+    return false;
   }
   // Handle metatypes (the instance type is parsed).
   if (P.Tok.is(tok::sil_dollar)) {
diff --git a/lib/SIL/SILConstants.cpp b/lib/SIL/SILConstants.cpp
index f1f7f5d..d428842 100644
--- a/lib/SIL/SILConstants.cpp
+++ b/lib/SIL/SILConstants.cpp
@@ -64,6 +64,9 @@
     getFloatValue().print(os);
     os << "\n";
     return;
+  case RK_String:
+    os << "string: \"" << getStringValue() << "\"\n";
+    return;
   case RK_Address: {
     os << "address indices = [";
     interleave(getAddressIndices(), [&](unsigned idx) { os << idx; },
@@ -99,6 +102,7 @@
   case RK_Aggregate:    return Aggregate;
   case RK_Integer:      return Integer;
   case RK_Float:        return Float;
+  case RK_String:       return String;
   case RK_Inst:
     auto *inst = value.inst;
     if (isa<IntegerLiteralInst>(inst))
@@ -117,7 +121,7 @@
 
 namespace swift {
 /// This is a representation of an integer value, stored as a trailing array
-/// of words.  Elements of this value are bump pointer allocated.
+/// of words.  Elements of this value are bump-pointer allocated.
 struct alignas(uint64_t) APIntSymbolicValue final
   : private llvm::TrailingObjects<APIntSymbolicValue, uint64_t> {
     friend class llvm::TrailingObjects<APIntSymbolicValue, uint64_t>;
@@ -155,7 +159,6 @@
 };
 } // end namespace swift
 
-
 SymbolicValue SymbolicValue::getInteger(const APInt &value,
                                         llvm::BumpPtrAllocator &allocator) {
   // TODO: Could store these inline in the union in the common case.
@@ -163,7 +166,7 @@
     APIntSymbolicValue::create(value.getBitWidth(),
                                { value.getRawData(), value.getNumWords()},
                                allocator);
-  assert(intValue && "aggregate value must be present");
+  assert(intValue && "Integer value must be present");
   SymbolicValue result;
   result.representationKind = RK_Integer;
   result.value.integer = intValue;
@@ -185,8 +188,8 @@
 //===----------------------------------------------------------------------===//
 
 namespace swift {
-/// This is a representation of an integer value, stored as a trailing array
-/// of words.  Elements of this value are bump pointer allocated.
+/// This is a representation of a floating point value, stored as a trailing
+/// array of words.  Elements of this value are bump-pointer allocated.
 struct alignas(uint64_t) APFloatSymbolicValue final
   : private llvm::TrailingObjects<APFloatSymbolicValue, uint64_t> {
     friend class llvm::TrailingObjects<APFloatSymbolicValue, uint64_t>;
@@ -239,31 +242,96 @@
     APFloatSymbolicValue::create(value.getSemantics(),
                                  { val.getRawData(), val.getNumWords()},
                                  allocator);
-  assert(fpValue && "aggregate value must be present");
+  assert(fpValue && "Floating point value must be present");
   SymbolicValue result;
   result.representationKind = RK_Float;
-  result.value.float_ = fpValue;
+  result.value.floatingPoint = fpValue;
   return result;
 }
 
-
 APFloat SymbolicValue::getFloatValue() const {
   assert(getKind() == Float);
 
   if (representationKind == RK_Float)
-    return value.float_->getValue();
+    return value.floatingPoint->getValue();
 
   assert(representationKind == RK_Inst);
   return cast<FloatLiteralInst>(value.inst)->getValue();
 }
 
 //===----------------------------------------------------------------------===//
+// Strings
+//===----------------------------------------------------------------------===//
+
+namespace swift {
+/// This is a representation of an UTF-8 encoded string, stored as a trailing
+/// array of bytes.  Elements of this value are bump-pointer allocated.
+struct alignas(uint64_t) StringSymbolicValue final
+  : private llvm::TrailingObjects<StringSymbolicValue, char> {
+    friend class llvm::TrailingObjects<StringSymbolicValue, char>;
+
+  /// The number of bytes in the trailing array.
+  const unsigned numBytes;
+
+  static StringSymbolicValue *create(const StringRef string,
+                                     llvm::BumpPtrAllocator &allocator) {
+    auto size = StringSymbolicValue::totalSizeToAlloc<char>(string.size());
+    auto rawMem = allocator.Allocate(size, alignof(StringSymbolicValue));
+
+    // Placement initialize the StringSymbolicValue.
+    auto ilv = ::new (rawMem) StringSymbolicValue(string.size());
+    std::uninitialized_copy(string.begin(), string.end(),
+                            ilv->getTrailingObjects<char>());
+    return ilv;
+  }
+
+  StringRef getValue() const {
+    return {
+      getTrailingObjects<char>(), numTrailingObjects(OverloadToken<char>())
+    };
+  }
+
+  // This is used by the llvm::TrailingObjects base class.
+  size_t numTrailingObjects(OverloadToken<char>) const {
+    return numBytes;
+  }
+private:
+  StringSymbolicValue() = delete;
+  StringSymbolicValue(const StringSymbolicValue &) = delete;
+  StringSymbolicValue(unsigned numBytes) :
+    numBytes(numBytes) {}
+};
+} // end namespace swift
+
+// Returns a SymbolicValue representing a UTF-8 encoded string.
+SymbolicValue SymbolicValue::getString(const StringRef string,
+                                       llvm::BumpPtrAllocator &allocator) {
+  auto stringValue = StringSymbolicValue::create(string, allocator);
+  assert(stringValue && "String value must be present");
+  SymbolicValue result;
+  result.representationKind = RK_String;
+  result.value.string = stringValue;
+  return result;
+}
+
+// Returns the UTF-8 encoded string underlying a SymbolicValue.
+StringRef SymbolicValue::getStringValue() const {
+  assert(getKind() == String);
+
+  if (representationKind == RK_String)
+    return value.string->getValue();
+
+  assert(representationKind == RK_Inst);
+  return cast<StringLiteralInst>(value.inst)->getValue();
+}
+
+//===----------------------------------------------------------------------===//
 // Addresses
 //===----------------------------------------------------------------------===//
 
 namespace swift {
 /// This is a representation of an address value, stored as a base pointer plus
-/// trailing array of indices.  Elements of this value are bump pointer
+/// trailing array of indices.  Elements of this value are bump-pointer
 /// allocated.
 struct alignas(SILValue) AddressSymbolicValue final
   : private llvm::TrailingObjects<AddressSymbolicValue, unsigned> {
diff --git a/lib/SIL/SILPrinter.cpp b/lib/SIL/SILPrinter.cpp
index 33e0dc3..a99c63b 100644
--- a/lib/SIL/SILPrinter.cpp
+++ b/lib/SIL/SILPrinter.cpp
@@ -1199,14 +1199,12 @@
       return;
     }
     case SymbolicValue::String:
-      // TODO: Uncomment when `getStringValue` is implemented.
-      // *this << v.getStringValue();
-      llvm_unreachable("`SymbolicValue.getStringValue` is unimplemented");
-      break;
+      *this << QuotedString(v.getStringValue());
+      return;
     case SymbolicValue::Metatype: {
       auto metatype = cast<AnyMetatypeType>(v.getMetatypeValue());
       *this << SILType::getPrimitiveObjectType(metatype.getInstanceType());
-      break;
+      return;
     }
     case SymbolicValue::Aggregate:
       *this << "[";
@@ -1216,13 +1214,12 @@
         *this << ", ";
       });
       *this << "]";
-      break;
+      return;
     case SymbolicValue::Function:
     case SymbolicValue::Address:
     case SymbolicValue::UninitMemory:
     case SymbolicValue::Unknown:
       llvm_unreachable("Unimplemented SymbolicValue case");
-      break;
     }
   }
 
diff --git a/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp b/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp
index 15f6d56..108c61b 100644
--- a/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp
+++ b/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp
@@ -1497,7 +1497,6 @@
   case SymbolicValue::Address:
     assert(0 && "Shouldn't happen");
   case SymbolicValue::Aggregate:
-  case SymbolicValue::String:
   case SymbolicValue::Function:
     // TODO: Unsupported right now.
     return nullptr;
@@ -1511,6 +1510,9 @@
     return B.createIntegerLiteral(loc, type, symVal.getIntegerValue());
   case SymbolicValue::Float:
     return B.createFloatLiteral(loc, type, symVal.getFloatValue());
+  case SymbolicValue::String:
+    return B.createStringLiteral(loc, symVal.getStringValue(),
+                                 StringLiteralInst::Encoding::UTF8);
   }
 }
 
diff --git a/test/TensorFlow/graph_op_inst.sil b/test/TensorFlow/graph_op_inst.sil
index f2169ac..2fa41c6 100644
--- a/test/TensorFlow/graph_op_inst.sil
+++ b/test/TensorFlow/graph_op_inst.sil
@@ -12,7 +12,8 @@
 bb0:
   %0 = graph_op "tf.Dummy"() {int1: i32 -3, int2: i8 4, int3: i64 42} : $Tensor<Float>
   %1 = graph_op "tf.Dummy"() {hex1: f64 0x40091EB851EB851F, hex2: f32 0x4048F5C3} : $Tensor<Float>
-  %3 = graph_op "tf.Dummy"() {float1: f64 3.14, float2: f32 -3.14} : $Tensor<Float>
+  %2 = graph_op "tf.Dummy"() {float1: f64 3.14, float2: f32 -3.14} : $Tensor<Float>
+  %3 = graph_op "tf.Dummy"() {string1: "hello", string2: "world"} : $Tensor<Float>
   %4 = graph_op "tf.Dummy"() {metatype1: $Float, metatype2: $Tensor<Float>} : $Tensor<Float>
   %5 = graph_op "tf.Dummy"() {array: [[i8 1, i32 -2], [f32 -1.0, $Float]]} : $Tensor<Float>
   return %0 : $Tensor<Float>
@@ -20,11 +21,12 @@
 
 // CHECK-LABEL: sil @attribute_test : $@convention(thin) () -> Tensor<Float> {
 // CHECK: bb0:
-// CHECK-NEXT:   %0 = graph_op "tf.Dummy"() {int1: i32 -3, int2: i8 4, int3: i64 42} : $Tensor<Float> // user: %5
+// CHECK-NEXT:   %0 = graph_op "tf.Dummy"() {int1: i32 -3, int2: i8 4, int3: i64 42} : $Tensor<Float>
 // CHECK-NEXT:   %1 = graph_op "tf.Dummy"() {hex1: f64 0x40091EB851EB851F /* 3.1400000000000001 */, hex2: f32 0x4048F5C3 /* 3.1400001 */} : $Tensor<Float>
 // CHECK-NEXT:   %2 = graph_op "tf.Dummy"() {float1: f64 0x40091EB851EB851F /* 3.1400000000000001 */, float2: f32 0xC048F5C3 /* -3.1400001 */} : $Tensor<Float>
-// CHECK-NEXT:   %3 = graph_op "tf.Dummy"() {metatype1: $Float, metatype2: $Tensor<Float>} : $Tensor<Float>
-// CHECK-NEXT:   %4 = graph_op "tf.Dummy"() {array: {{\[\[}}i8 1, i32 -2], [f32 0xBF800000 /* -1 */, $Float]]} : $Tensor<Float>
+// CHECK-NEXT:   %3 = graph_op "tf.Dummy"() {string1: "hello", string2: "world"} : $Tensor<Float>
+// CHECK-NEXT:   %4 = graph_op "tf.Dummy"() {metatype1: $Float, metatype2: $Tensor<Float>} : $Tensor<Float>
+// CHECK-NEXT:   %5 = graph_op "tf.Dummy"() {array: {{\[\[}}i8 1, i32 -2], [f32 0xBF800000 /* -1 */, $Float]]} : $Tensor<Float>
 // CHECK-NEXT:   return %0 : $Tensor<Float>
 // CHECK-NEXT: }