[SIL] Add UTF-8 string `SymbolicValue`. (#17235)
Add a UTF-8 string case to `SymbolicValue` and support it as a `graph_op`
attribute value.
Also do some other NFC cleanup.
diff --git a/include/swift/SIL/SILConstants.h b/include/swift/SIL/SILConstants.h
index ab52927..3f1deb2 100644
--- a/include/swift/SIL/SILConstants.h
+++ b/include/swift/SIL/SILConstants.h
@@ -28,6 +28,7 @@
struct APIntSymbolicValue;
struct APFloatSymbolicValue;
+struct StringSymbolicValue;
struct AddressSymbolicValue;
struct AggregateSymbolicValue;
@@ -79,16 +80,20 @@
/// metatype value.
RK_Inst,
- /// This value is represented with a bump pointer allocated APInt.
+ /// This value is represented with a bump-pointer allocated APInt.
/// TODO: We could store small integers into the union inline to avoid
/// allocations if it ever matters.
RK_Integer,
- /// This value is represented with a bump pointer allocated APFloat.
+ /// This value is represented with a bump-pointer allocated APFloat.
/// TODO: We could store small floats into the union inline to avoid
/// allocations if it ever matters.
RK_Float,
+ /// This value is represented with a bump-pointer allocated char array
+ /// representing a UTF-8 encoded string.
+ RK_String,
+
/// This value is a pointer to a tracked memory location, along with zero
/// or more indices (tuple indices, struct field indices, etc) into the
/// value if it is an aggregate.
@@ -135,7 +140,11 @@
/// When this SymbolicValue is of "Float" kind, this pointer stores
/// information about the APFloat value it holds.
- APFloatSymbolicValue *float_;
+ APFloatSymbolicValue *floatingPoint;
+
+ /// When this SymbolicValue is of "String" kind, this pointer stores
+ /// information about the StringRef value it holds.
+ StringSymbolicValue *string;
/// When this SymbolicValue is of "Address" kind, this pointer stores
/// info about the base and the indices for the address.
@@ -239,6 +248,13 @@
APFloat getFloatValue() const;
+ // Returns a SymbolicValue representing a UTF-8 encoded string.
+ static SymbolicValue getString(const StringRef string,
+ llvm::BumpPtrAllocator &allocator);
+
+ // Returns the UTF-8 encoded string underlying a SymbolicValue.
+ StringRef getStringValue() const;
+
/// Get a SymbolicValue corresponding to a memory object with an optional
/// list of indices into it. This is used by (e.g.) a struct_element_addr
/// of a stack_alloc.
@@ -262,9 +278,6 @@
ArrayRef<SymbolicValue> getAggregateValue() const;
- // TODO: getStringValue.
-
-
/// Given that this is an 'Unknown' value, emit diagnostic notes providing
/// context about what the problem is.
void emitUnknownDiagnosticNotes();
diff --git a/lib/ParseSIL/ParseSIL.cpp b/lib/ParseSIL/ParseSIL.cpp
index 7fce830..464f4d4 100644
--- a/lib/ParseSIL/ParseSIL.cpp
+++ b/lib/ParseSIL/ParseSIL.cpp
@@ -893,6 +893,7 @@
/// - A floating point datatype and literal (f64 3.14).
/// - The literal value may be in either decimal format or the hexadecimal
/// format used by the 'float_literal' instruction.
+/// - A UTF-8 string literal ("foo").
/// - A metatype (the instance type is parsed) ($Float).
/// - An aggregate ([i32 1, i64 2, f32 3.0]).
/// - Aggregates values represent constant arrays/structs/tuples.
@@ -997,12 +998,10 @@
}
// Handle string literals.
if (P.Tok.is(tok::string_literal)) {
- // TODO: Uncomment when `getStringValue` is implemented.
- // StringRef rawString = P.Tok.getText().drop_front().drop_back();
- // value = SymbolicValue::getStringValue(rawString, allocator);
- // return false;
- P.diagnose(P.Tok, diag::sil_graph_op_unhandled_attr_value);
- return true;
+ StringRef rawString = P.Tok.getText().drop_front().drop_back();
+ value = SymbolicValue::getString(rawString, allocator);
+ P.consumeToken(tok::string_literal);
+ return false;
}
// Handle metatypes (the instance type is parsed).
if (P.Tok.is(tok::sil_dollar)) {
diff --git a/lib/SIL/SILConstants.cpp b/lib/SIL/SILConstants.cpp
index f1f7f5d..d428842 100644
--- a/lib/SIL/SILConstants.cpp
+++ b/lib/SIL/SILConstants.cpp
@@ -64,6 +64,9 @@
getFloatValue().print(os);
os << "\n";
return;
+ case RK_String:
+ os << "string: \"" << getStringValue() << "\"\n";
+ return;
case RK_Address: {
os << "address indices = [";
interleave(getAddressIndices(), [&](unsigned idx) { os << idx; },
@@ -99,6 +102,7 @@
case RK_Aggregate: return Aggregate;
case RK_Integer: return Integer;
case RK_Float: return Float;
+ case RK_String: return String;
case RK_Inst:
auto *inst = value.inst;
if (isa<IntegerLiteralInst>(inst))
@@ -117,7 +121,7 @@
namespace swift {
/// This is a representation of an integer value, stored as a trailing array
-/// of words. Elements of this value are bump pointer allocated.
+/// of words. Elements of this value are bump-pointer allocated.
struct alignas(uint64_t) APIntSymbolicValue final
: private llvm::TrailingObjects<APIntSymbolicValue, uint64_t> {
friend class llvm::TrailingObjects<APIntSymbolicValue, uint64_t>;
@@ -155,7 +159,6 @@
};
} // end namespace swift
-
SymbolicValue SymbolicValue::getInteger(const APInt &value,
llvm::BumpPtrAllocator &allocator) {
// TODO: Could store these inline in the union in the common case.
@@ -163,7 +166,7 @@
APIntSymbolicValue::create(value.getBitWidth(),
{ value.getRawData(), value.getNumWords()},
allocator);
- assert(intValue && "aggregate value must be present");
+ assert(intValue && "Integer value must be present");
SymbolicValue result;
result.representationKind = RK_Integer;
result.value.integer = intValue;
@@ -185,8 +188,8 @@
//===----------------------------------------------------------------------===//
namespace swift {
-/// This is a representation of an integer value, stored as a trailing array
-/// of words. Elements of this value are bump pointer allocated.
+/// This is a representation of a floating point value, stored as a trailing
+/// array of words. Elements of this value are bump-pointer allocated.
struct alignas(uint64_t) APFloatSymbolicValue final
: private llvm::TrailingObjects<APFloatSymbolicValue, uint64_t> {
friend class llvm::TrailingObjects<APFloatSymbolicValue, uint64_t>;
@@ -239,31 +242,96 @@
APFloatSymbolicValue::create(value.getSemantics(),
{ val.getRawData(), val.getNumWords()},
allocator);
- assert(fpValue && "aggregate value must be present");
+ assert(fpValue && "Floating point value must be present");
SymbolicValue result;
result.representationKind = RK_Float;
- result.value.float_ = fpValue;
+ result.value.floatingPoint = fpValue;
return result;
}
-
APFloat SymbolicValue::getFloatValue() const {
assert(getKind() == Float);
if (representationKind == RK_Float)
- return value.float_->getValue();
+ return value.floatingPoint->getValue();
assert(representationKind == RK_Inst);
return cast<FloatLiteralInst>(value.inst)->getValue();
}
//===----------------------------------------------------------------------===//
+// Strings
+//===----------------------------------------------------------------------===//
+
+namespace swift {
+/// This is a representation of an UTF-8 encoded string, stored as a trailing
+/// array of bytes. Elements of this value are bump-pointer allocated.
+struct alignas(uint64_t) StringSymbolicValue final
+ : private llvm::TrailingObjects<StringSymbolicValue, char> {
+ friend class llvm::TrailingObjects<StringSymbolicValue, char>;
+
+ /// The number of bytes in the trailing array.
+ const unsigned numBytes;
+
+ static StringSymbolicValue *create(const StringRef string,
+ llvm::BumpPtrAllocator &allocator) {
+ auto size = StringSymbolicValue::totalSizeToAlloc<char>(string.size());
+ auto rawMem = allocator.Allocate(size, alignof(StringSymbolicValue));
+
+ // Placement initialize the StringSymbolicValue.
+ auto ilv = ::new (rawMem) StringSymbolicValue(string.size());
+ std::uninitialized_copy(string.begin(), string.end(),
+ ilv->getTrailingObjects<char>());
+ return ilv;
+ }
+
+ StringRef getValue() const {
+ return {
+ getTrailingObjects<char>(), numTrailingObjects(OverloadToken<char>())
+ };
+ }
+
+ // This is used by the llvm::TrailingObjects base class.
+ size_t numTrailingObjects(OverloadToken<char>) const {
+ return numBytes;
+ }
+private:
+ StringSymbolicValue() = delete;
+ StringSymbolicValue(const StringSymbolicValue &) = delete;
+ StringSymbolicValue(unsigned numBytes) :
+ numBytes(numBytes) {}
+};
+} // end namespace swift
+
+// Returns a SymbolicValue representing a UTF-8 encoded string.
+SymbolicValue SymbolicValue::getString(const StringRef string,
+ llvm::BumpPtrAllocator &allocator) {
+ auto stringValue = StringSymbolicValue::create(string, allocator);
+ assert(stringValue && "String value must be present");
+ SymbolicValue result;
+ result.representationKind = RK_String;
+ result.value.string = stringValue;
+ return result;
+}
+
+// Returns the UTF-8 encoded string underlying a SymbolicValue.
+StringRef SymbolicValue::getStringValue() const {
+ assert(getKind() == String);
+
+ if (representationKind == RK_String)
+ return value.string->getValue();
+
+ assert(representationKind == RK_Inst);
+ return cast<StringLiteralInst>(value.inst)->getValue();
+}
+
+//===----------------------------------------------------------------------===//
// Addresses
//===----------------------------------------------------------------------===//
namespace swift {
/// This is a representation of an address value, stored as a base pointer plus
-/// trailing array of indices. Elements of this value are bump pointer
+/// trailing array of indices. Elements of this value are bump-pointer
/// allocated.
struct alignas(SILValue) AddressSymbolicValue final
: private llvm::TrailingObjects<AddressSymbolicValue, unsigned> {
diff --git a/lib/SIL/SILPrinter.cpp b/lib/SIL/SILPrinter.cpp
index 33e0dc3..a99c63b 100644
--- a/lib/SIL/SILPrinter.cpp
+++ b/lib/SIL/SILPrinter.cpp
@@ -1199,14 +1199,12 @@
return;
}
case SymbolicValue::String:
- // TODO: Uncomment when `getStringValue` is implemented.
- // *this << v.getStringValue();
- llvm_unreachable("`SymbolicValue.getStringValue` is unimplemented");
- break;
+ *this << QuotedString(v.getStringValue());
+ return;
case SymbolicValue::Metatype: {
auto metatype = cast<AnyMetatypeType>(v.getMetatypeValue());
*this << SILType::getPrimitiveObjectType(metatype.getInstanceType());
- break;
+ return;
}
case SymbolicValue::Aggregate:
*this << "[";
@@ -1216,13 +1214,12 @@
*this << ", ";
});
*this << "]";
- break;
+ return;
case SymbolicValue::Function:
case SymbolicValue::Address:
case SymbolicValue::UninitMemory:
case SymbolicValue::Unknown:
llvm_unreachable("Unimplemented SymbolicValue case");
- break;
}
}
diff --git a/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp b/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp
index 15f6d56..108c61b 100644
--- a/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp
+++ b/lib/SILOptimizer/Mandatory/TFDeabstraction.cpp
@@ -1497,7 +1497,6 @@
case SymbolicValue::Address:
assert(0 && "Shouldn't happen");
case SymbolicValue::Aggregate:
- case SymbolicValue::String:
case SymbolicValue::Function:
// TODO: Unsupported right now.
return nullptr;
@@ -1511,6 +1510,9 @@
return B.createIntegerLiteral(loc, type, symVal.getIntegerValue());
case SymbolicValue::Float:
return B.createFloatLiteral(loc, type, symVal.getFloatValue());
+ case SymbolicValue::String:
+ return B.createStringLiteral(loc, symVal.getStringValue(),
+ StringLiteralInst::Encoding::UTF8);
}
}
diff --git a/test/TensorFlow/graph_op_inst.sil b/test/TensorFlow/graph_op_inst.sil
index f2169ac..2fa41c6 100644
--- a/test/TensorFlow/graph_op_inst.sil
+++ b/test/TensorFlow/graph_op_inst.sil
@@ -12,7 +12,8 @@
bb0:
%0 = graph_op "tf.Dummy"() {int1: i32 -3, int2: i8 4, int3: i64 42} : $Tensor<Float>
%1 = graph_op "tf.Dummy"() {hex1: f64 0x40091EB851EB851F, hex2: f32 0x4048F5C3} : $Tensor<Float>
- %3 = graph_op "tf.Dummy"() {float1: f64 3.14, float2: f32 -3.14} : $Tensor<Float>
+ %2 = graph_op "tf.Dummy"() {float1: f64 3.14, float2: f32 -3.14} : $Tensor<Float>
+ %3 = graph_op "tf.Dummy"() {string1: "hello", string2: "world"} : $Tensor<Float>
%4 = graph_op "tf.Dummy"() {metatype1: $Float, metatype2: $Tensor<Float>} : $Tensor<Float>
%5 = graph_op "tf.Dummy"() {array: [[i8 1, i32 -2], [f32 -1.0, $Float]]} : $Tensor<Float>
return %0 : $Tensor<Float>
@@ -20,11 +21,12 @@
// CHECK-LABEL: sil @attribute_test : $@convention(thin) () -> Tensor<Float> {
// CHECK: bb0:
-// CHECK-NEXT: %0 = graph_op "tf.Dummy"() {int1: i32 -3, int2: i8 4, int3: i64 42} : $Tensor<Float> // user: %5
+// CHECK-NEXT: %0 = graph_op "tf.Dummy"() {int1: i32 -3, int2: i8 4, int3: i64 42} : $Tensor<Float>
// CHECK-NEXT: %1 = graph_op "tf.Dummy"() {hex1: f64 0x40091EB851EB851F /* 3.1400000000000001 */, hex2: f32 0x4048F5C3 /* 3.1400001 */} : $Tensor<Float>
// CHECK-NEXT: %2 = graph_op "tf.Dummy"() {float1: f64 0x40091EB851EB851F /* 3.1400000000000001 */, float2: f32 0xC048F5C3 /* -3.1400001 */} : $Tensor<Float>
-// CHECK-NEXT: %3 = graph_op "tf.Dummy"() {metatype1: $Float, metatype2: $Tensor<Float>} : $Tensor<Float>
-// CHECK-NEXT: %4 = graph_op "tf.Dummy"() {array: {{\[\[}}i8 1, i32 -2], [f32 0xBF800000 /* -1 */, $Float]]} : $Tensor<Float>
+// CHECK-NEXT: %3 = graph_op "tf.Dummy"() {string1: "hello", string2: "world"} : $Tensor<Float>
+// CHECK-NEXT: %4 = graph_op "tf.Dummy"() {metatype1: $Float, metatype2: $Tensor<Float>} : $Tensor<Float>
+// CHECK-NEXT: %5 = graph_op "tf.Dummy"() {array: {{\[\[}}i8 1, i32 -2], [f32 0xBF800000 /* -1 */, $Float]]} : $Tensor<Float>
// CHECK-NEXT: return %0 : $Tensor<Float>
// CHECK-NEXT: }