include/swift/Demangling/ManglingUtils.h - third_party/swift - Git at Google

 //===--- ManglingUtils.h - Utilities for Swift name mangling ----*- C++ -*-===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//

 #ifndef SWIFT_DEMANGLING_MANGLINGUTILS_H
 #define SWIFT_DEMANGLING_MANGLINGUTILS_H

 #include "llvm/ADT/StringRef.h"
 #include "swift/Demangling/Punycode.h"

 namespace swift {
 namespace Mangle {

 using llvm::StringRef;

 inline bool isLowerLetter(char ch) {
   return ch >= 'a' && ch <= 'z';
 }

 inline bool isUpperLetter(char ch) {
   return ch >= 'A' && ch <= 'Z';
 }

 inline bool isDigit(char ch) {
   return ch >= '0' && ch <= '9';
 }

 inline bool isLetter(char ch) {
   return isLowerLetter(ch) || isUpperLetter(ch);
 }

 /// Returns true if \p ch is a character which defines the begin of a
 /// substitution word.
 inline bool isWordStart(char ch) {
   return !isDigit(ch) && ch != '_' && ch != 0;
 }

 /// Returns true if \p ch is a character (following \p prevCh) which defines
 /// the end of a substitution word.
 inline bool isWordEnd(char ch, char prevCh) {
   if (ch == '_' || ch == 0)
     return true;

   if (!isUpperLetter(prevCh) && isUpperLetter(ch))
     return true;

   return false;
 }

 /// Returns true if \p ch is a valid character which may appear in a symbol
 /// mangling.
 inline bool isValidSymbolChar(char ch) {
   return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$';
 }

 /// Returns true if \p str contains any character which may not appear in a
 /// mangled symbol string and therefore must be punycode encoded.
 bool needsPunycodeEncoding(StringRef str);

 /// Returns true if \p str contains any non-ASCII character.
 bool isNonAscii(StringRef str);

 /// Describes a Word in a mangled identifier.
 struct SubstitutionWord {

   /// The position of the first word character in the mangled string.
   size_t start;

   /// The length of the word.
   size_t length;
 };

 /// Helper struct which represents a word substitution.
 struct WordReplacement {
   /// The position in the identifier where the word is substituted.
   size_t StringPos;

   /// The index into the mangler's Words array (-1 if invalid).
   int WordIdx;
 };

 /// Translate the given operator character into its mangled form.
 ///
 /// Current operator characters:   @/=-+*%<>!&|^~ and the special operator '..'
 char translateOperatorChar(char op);

 /// Returns a string where all characters of the operator \p Op are translated
 /// to their mangled form.
 std::string translateOperator(StringRef Op);

 /// Returns the standard type kind for an 'S' substitution, e.g. 'i' for "Int".
 char getStandardTypeSubst(StringRef TypeName);

 /// Mangles an identifier using a generic Mangler class.
 ///
 /// The Mangler class must provide the following:
 /// *) Words: An array of SubstitutionWord which holds the current list of
 ///           found words which can be used for substitutions.
 /// *) SubstWordsInIdent: An array of WordReplacement, which is just used
 ///           as a temporary storage during mangling. Must be empty.
 /// *) Buffer: A stream where the mangled identifier is written to.
 /// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
 /// *) UsePunycode: A flag indicating if punycode encoding should be done.
 template <typename Mangler>
 void mangleIdentifier(Mangler &M, StringRef ident) {

   size_t WordsInBuffer = M.Words.size();
   assert(M.SubstWordsInIdent.empty());
   if (M.UsePunycode && needsPunycodeEncoding(ident)) {
     // If the identifier contains non-ASCII character, we mangle
     // with an initial '00' and Punycode the identifier string.
     std::string punycodeBuf;
     Punycode::encodePunycodeUTF8(ident, punycodeBuf,
                                  /*mapNonSymbolChars*/ true);
     StringRef pcIdent = punycodeBuf;
     M.Buffer << "00" << pcIdent.size();
     if (isDigit(pcIdent[0]) || pcIdent[0] == '_')
       M.Buffer << '_';
     M.Buffer << pcIdent;
     return;
   }
   // Search for word substitutions and for new words.
   const size_t NotInsideWord = ~0;
   size_t wordStartPos = NotInsideWord;
   for (size_t Pos = 0, Len = ident.size(); Pos <= Len; ++Pos) {
     char ch = (Pos < Len ? ident[Pos] : 0);
     if (wordStartPos != NotInsideWord && isWordEnd(ch, ident[Pos - 1])) {
       // This position is the end of a word, i.e. the next character after a
       // word.
       assert(Pos > wordStartPos);
       size_t wordLen = Pos - wordStartPos;
       StringRef Word = ident.substr(wordStartPos, wordLen);

       // Helper function to lookup the Word in a string.
       auto lookupWord = [&] (StringRef Str,
                              size_t FromWordIdx, size_t ToWordIdx) -> int {
         for (size_t Idx = FromWordIdx; Idx < ToWordIdx; ++Idx) {
           const SubstitutionWord &w = M.Words[Idx];
           StringRef existingWord =  Str.substr(w.start, w.length);
           if (Word == existingWord)
             return (int)Idx;
         }
         return -1;
       };

       // Is the word already present in the so far mangled string?
       int WordIdx = lookupWord(M.getBufferStr(), 0, WordsInBuffer);
       // Otherwise, is the word already present in this identifier?
       if (WordIdx < 0)
         WordIdx = lookupWord(ident, WordsInBuffer, M.Words.size());

       if (WordIdx >= 0) {
         // We found a word substitution!
         assert(WordIdx < 26);
         M.addSubstWordsInIdent({wordStartPos, WordIdx});
       } else if (wordLen >= 2 && M.Words.size() < M.MaxNumWords) {
         // It's a new word: remember it.
         // Note: at this time the word's start position is relative to the
         // begin of the identifier. We must update it afterwards so that it is
         // relative to the begin of the whole mangled Buffer.
         M.addWord({wordStartPos, wordLen});
       }
       wordStartPos = NotInsideWord;
     }
     if (wordStartPos == NotInsideWord && isWordStart(ch)) {
       // This position is the begin of a word.
       wordStartPos = Pos;
     }
   }
   // If we have word substitutions mangle an initial '0'.
   if (!M.SubstWordsInIdent.empty())
     M.Buffer << '0';

   size_t Pos = 0;
   // Add a dummy-word at the end of the list.
   M.addSubstWordsInIdent({ident.size(), -1});

   // Mangle a sequence of word substitutions and sub-strings.
   for (size_t Idx = 0, End = M.SubstWordsInIdent.size(); Idx < End; ++Idx) {
     const WordReplacement &Repl = M.SubstWordsInIdent[Idx];
     if (Pos < Repl.StringPos) {
       // Mangle the sub-string up to the next word substitution (or to the end
       // of the identifier - that's why we added the dummy-word).
       // The first thing: we add the encoded sub-string length.
       M.Buffer << (Repl.StringPos - Pos);
       assert(!isDigit(ident[Pos]) &&
              "first char of sub-string may not be a digit");
       do {
         // Update the start position of new added words, so that they refer to
         // the begin of the whole mangled Buffer.
         if (WordsInBuffer < M.Words.size() &&
             M.Words[WordsInBuffer].start == Pos) {
           M.Words[WordsInBuffer].start = M.getBufferStr().size();
           WordsInBuffer++;
         }
         // Add a literal character of the sub-string.
         M.Buffer << ident[Pos];
         Pos++;
       } while (Pos < Repl.StringPos);
     }
     // Is it a "real" word substitution (and not the dummy-word)?
     if (Repl.WordIdx >= 0) {
       assert(Repl.WordIdx <= (int)WordsInBuffer);
       Pos += M.Words[Repl.WordIdx].length;
       if (Idx < End - 2) {
         M.Buffer << (char)(Repl.WordIdx + 'a');
       } else {
         // The last word substitution is a capital letter.
         M.Buffer << (char)(Repl.WordIdx + 'A');
         if (Pos == ident.size())
           M.Buffer << '0';
       }
     }
   }
   M.SubstWordsInIdent.clear();
 }

 /// Utility class for mangling merged substitutions.
 ///
 /// Used in the Mangler and Remangler.
 class SubstitutionMerging {

   /// The position of the last substitution mangling,
   /// e.g. 3 for 'AabC' and 'Aab4C'
   size_t lastSubstPosition = 0;

   /// The size of the last substitution mangling,
   /// e.g. 1 for 'AabC' or 2 for 'Aab4C'
   size_t lastSubstSize = 0;

   /// The repeat count of the last substitution,
   /// e.g. 1 for 'AabC' or 4 for 'Aab4C'
   size_t lastNumSubsts = 0;

   /// True if the last substitution is an 'S' substitution,
   /// false if the last substitution is an 'A' substitution.
   bool lastSubstIsStandardSubst = false;

 public:

   // The only reason to limit the number of repeated substitutions is that we
   // don't want that the demangler blows up on a bogus substitution, e.g.
   // ...A832456823746582B...
   enum { MaxRepeatCount = 2048 };

   void clear() {
     lastNumSubsts = 0;
   }

   /// Tries to merge the substitution \p Subst with a previously mangled
   /// substitution.
   ///
   /// Returns true on success. In case of false, the caller must mangle the
   /// substitution separately in the form 'S<Subst>' or 'A<Subst>'.
   ///
   /// The Mangler class must provide the following:
   /// *) Buffer: A stream where the mangled identifier is written to.
   /// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
   /// *) resetBuffer(size_t): Resets the buffer to an old position.
   template <typename Mangler>
   bool tryMergeSubst(Mangler &M, char Subst, bool isStandardSubst) {
     assert(isUpperLetter(Subst) || (isStandardSubst && isLowerLetter(Subst)));
     StringRef BufferStr = M.getBufferStr();
     if (lastNumSubsts > 0 && lastNumSubsts < MaxRepeatCount
         && BufferStr.size() == lastSubstPosition + lastSubstSize
         && lastSubstIsStandardSubst == isStandardSubst) {

       // The last mangled thing is a substitution.
       assert(lastSubstPosition > 0 && lastSubstPosition < BufferStr.size());
       assert(lastSubstSize > 0);
       char lastSubst = BufferStr.back();
       assert(isUpperLetter(lastSubst)
              || (isStandardSubst && isLowerLetter(lastSubst)));
       if (lastSubst != Subst && !isStandardSubst) {
         // We can merge with a different 'A' substitution,
         // e.g. 'AB' -> 'AbC'.
         lastSubstPosition = BufferStr.size();
         lastNumSubsts = 1;
         M.resetBuffer(BufferStr.size() - 1);
         assert(isUpperLetter(lastSubst));
         M.Buffer << (char)(lastSubst - 'A' + 'a') << Subst;
         lastSubstSize = 1;
         return true;
       }
       if (lastSubst == Subst) {
         // We can merge with the same 'A' or 'S' substitution,
         // e.g. 'AB' -> 'A2B', or 'S3i' -> 'S4i'
         lastNumSubsts++;
         M.resetBuffer(lastSubstPosition);
         M.Buffer << lastNumSubsts;
         M.Buffer << Subst;
         lastSubstSize = M.getBufferStr().size() - lastSubstPosition;
         return true;
       }
     }
     // We can't merge with the previous substitution, but let's remember this
     // substitution which will be mangled by the caller.
     lastSubstPosition = BufferStr.size() + 1;
     lastSubstSize = 1;
     lastNumSubsts = 1;
     lastSubstIsStandardSubst = isStandardSubst;
     return false;
   }
 };

 } // end namespace Mangle
 } // end namespace swift

 #endif // SWIFT_DEMANGLING_MANGLINGUTILS_H
	//===--- ManglingUtils.h - Utilities for Swift name mangling ----- C++ --===//
	//
	// This source file is part of the Swift.org open source project
	//
	// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
	// Licensed under Apache License v2.0 with Runtime Library Exception
	//
	// See https://swift.org/LICENSE.txt for license information
	// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
	//
	//===----------------------------------------------------------------------===//

	#ifndef SWIFT_DEMANGLING_MANGLINGUTILS_H
	#define SWIFT_DEMANGLING_MANGLINGUTILS_H

	#include "llvm/ADT/StringRef.h"
	#include "swift/Demangling/Punycode.h"

	namespace swift {
	namespace Mangle {

	using llvm::StringRef;

	inline bool isLowerLetter(char ch) {
	return ch >= 'a' && ch <= 'z';
	}

	inline bool isUpperLetter(char ch) {
	return ch >= 'A' && ch <= 'Z';
	}

	inline bool isDigit(char ch) {
	return ch >= '0' && ch <= '9';
	}

	inline bool isLetter(char ch) {
	return isLowerLetter(ch) \|\| isUpperLetter(ch);
	}

	/// Returns true if \p ch is a character which defines the begin of a
	/// substitution word.
	inline bool isWordStart(char ch) {
	return !isDigit(ch) && ch != '_' && ch != 0;
	}

	/// Returns true if \p ch is a character (following \p prevCh) which defines
	/// the end of a substitution word.
	inline bool isWordEnd(char ch, char prevCh) {
	if (ch == '_' \|\| ch == 0)
	return true;

	if (!isUpperLetter(prevCh) && isUpperLetter(ch))
	return true;

	return false;
	}

	/// Returns true if \p ch is a valid character which may appear in a symbol
	/// mangling.
	inline bool isValidSymbolChar(char ch) {
	return isLetter(ch) \|\| isDigit(ch) \|\| ch == '_' \|\| ch == '$';
	}

	/// Returns true if \p str contains any character which may not appear in a
	/// mangled symbol string and therefore must be punycode encoded.
	bool needsPunycodeEncoding(StringRef str);

	/// Returns true if \p str contains any non-ASCII character.
	bool isNonAscii(StringRef str);

	/// Describes a Word in a mangled identifier.
	struct SubstitutionWord {

	/// The position of the first word character in the mangled string.
	size_t start;

	/// The length of the word.
	size_t length;
	};

	/// Helper struct which represents a word substitution.
	struct WordReplacement {
	/// The position in the identifier where the word is substituted.
	size_t StringPos;

	/// The index into the mangler's Words array (-1 if invalid).
	int WordIdx;
	};

	/// Translate the given operator character into its mangled form.
	///
	/// Current operator characters: @/=-+*%<>!&\|^~ and the special operator '..'
	char translateOperatorChar(char op);

	/// Returns a string where all characters of the operator \p Op are translated
	/// to their mangled form.
	std::string translateOperator(StringRef Op);

	/// Returns the standard type kind for an 'S' substitution, e.g. 'i' for "Int".
	char getStandardTypeSubst(StringRef TypeName);

	/// Mangles an identifier using a generic Mangler class.
	///
	/// The Mangler class must provide the following:
	/// *) Words: An array of SubstitutionWord which holds the current list of
	/// found words which can be used for substitutions.
	/// *) SubstWordsInIdent: An array of WordReplacement, which is just used
	/// as a temporary storage during mangling. Must be empty.
	/// *) Buffer: A stream where the mangled identifier is written to.
	/// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
	/// *) UsePunycode: A flag indicating if punycode encoding should be done.
	template <typename Mangler>
	void mangleIdentifier(Mangler &M, StringRef ident) {

	size_t WordsInBuffer = M.Words.size();
	assert(M.SubstWordsInIdent.empty());
	if (M.UsePunycode && needsPunycodeEncoding(ident)) {
	// If the identifier contains non-ASCII character, we mangle
	// with an initial '00' and Punycode the identifier string.
	std::string punycodeBuf;
	Punycode::encodePunycodeUTF8(ident, punycodeBuf,
	/mapNonSymbolChars/ true);
	StringRef pcIdent = punycodeBuf;
	M.Buffer << "00" << pcIdent.size();
	if (isDigit(pcIdent[0]) \|\| pcIdent[0] == '_')
	M.Buffer << '_';
	M.Buffer << pcIdent;
	return;
	}
	// Search for word substitutions and for new words.
	const size_t NotInsideWord = ~0;
	size_t wordStartPos = NotInsideWord;
	for (size_t Pos = 0, Len = ident.size(); Pos <= Len; ++Pos) {
	char ch = (Pos < Len ? ident[Pos] : 0);
	if (wordStartPos != NotInsideWord && isWordEnd(ch, ident[Pos - 1])) {
	// This position is the end of a word, i.e. the next character after a
	// word.
	assert(Pos > wordStartPos);
	size_t wordLen = Pos - wordStartPos;
	StringRef Word = ident.substr(wordStartPos, wordLen);

	// Helper function to lookup the Word in a string.
	auto lookupWord = [&] (StringRef Str,
	size_t FromWordIdx, size_t ToWordIdx) -> int {
	for (size_t Idx = FromWordIdx; Idx < ToWordIdx; ++Idx) {
	const SubstitutionWord &w = M.Words[Idx];
	StringRef existingWord = Str.substr(w.start, w.length);
	if (Word == existingWord)
	return (int)Idx;
	}
	return -1;
	};

	// Is the word already present in the so far mangled string?
	int WordIdx = lookupWord(M.getBufferStr(), 0, WordsInBuffer);
	// Otherwise, is the word already present in this identifier?
	if (WordIdx < 0)
	WordIdx = lookupWord(ident, WordsInBuffer, M.Words.size());

	if (WordIdx >= 0) {
	// We found a word substitution!
	assert(WordIdx < 26);
	M.addSubstWordsInIdent({wordStartPos, WordIdx});
	} else if (wordLen >= 2 && M.Words.size() < M.MaxNumWords) {
	// It's a new word: remember it.
	// Note: at this time the word's start position is relative to the
	// begin of the identifier. We must update it afterwards so that it is
	// relative to the begin of the whole mangled Buffer.
	M.addWord({wordStartPos, wordLen});
	}
	wordStartPos = NotInsideWord;
	}
	if (wordStartPos == NotInsideWord && isWordStart(ch)) {
	// This position is the begin of a word.
	wordStartPos = Pos;
	}
	}
	// If we have word substitutions mangle an initial '0'.
	if (!M.SubstWordsInIdent.empty())
	M.Buffer << '0';

	size_t Pos = 0;
	// Add a dummy-word at the end of the list.
	M.addSubstWordsInIdent({ident.size(), -1});

	// Mangle a sequence of word substitutions and sub-strings.
	for (size_t Idx = 0, End = M.SubstWordsInIdent.size(); Idx < End; ++Idx) {
	const WordReplacement &Repl = M.SubstWordsInIdent[Idx];
	if (Pos < Repl.StringPos) {
	// Mangle the sub-string up to the next word substitution (or to the end
	// of the identifier - that's why we added the dummy-word).
	// The first thing: we add the encoded sub-string length.
	M.Buffer << (Repl.StringPos - Pos);
	assert(!isDigit(ident[Pos]) &&
	"first char of sub-string may not be a digit");
	do {
	// Update the start position of new added words, so that they refer to
	// the begin of the whole mangled Buffer.
	if (WordsInBuffer < M.Words.size() &&
	M.Words[WordsInBuffer].start == Pos) {
	M.Words[WordsInBuffer].start = M.getBufferStr().size();
	WordsInBuffer++;
	}
	// Add a literal character of the sub-string.
	M.Buffer << ident[Pos];
	Pos++;
	} while (Pos < Repl.StringPos);
	}
	// Is it a "real" word substitution (and not the dummy-word)?
	if (Repl.WordIdx >= 0) {
	assert(Repl.WordIdx <= (int)WordsInBuffer);
	Pos += M.Words[Repl.WordIdx].length;
	if (Idx < End - 2) {
	M.Buffer << (char)(Repl.WordIdx + 'a');
	} else {
	// The last word substitution is a capital letter.
	M.Buffer << (char)(Repl.WordIdx + 'A');
	if (Pos == ident.size())
	M.Buffer << '0';
	}
	}
	}
	M.SubstWordsInIdent.clear();
	}

	/// Utility class for mangling merged substitutions.
	///
	/// Used in the Mangler and Remangler.
	class SubstitutionMerging {

	/// The position of the last substitution mangling,
	/// e.g. 3 for 'AabC' and 'Aab4C'
	size_t lastSubstPosition = 0;

	/// The size of the last substitution mangling,
	/// e.g. 1 for 'AabC' or 2 for 'Aab4C'
	size_t lastSubstSize = 0;

	/// The repeat count of the last substitution,
	/// e.g. 1 for 'AabC' or 4 for 'Aab4C'
	size_t lastNumSubsts = 0;

	/// True if the last substitution is an 'S' substitution,
	/// false if the last substitution is an 'A' substitution.
	bool lastSubstIsStandardSubst = false;

	public:

	// The only reason to limit the number of repeated substitutions is that we
	// don't want that the demangler blows up on a bogus substitution, e.g.
	// ...A832456823746582B...
	enum { MaxRepeatCount = 2048 };

	void clear() {
	lastNumSubsts = 0;
	}

	/// Tries to merge the substitution \p Subst with a previously mangled
	/// substitution.
	///
	/// Returns true on success. In case of false, the caller must mangle the
	/// substitution separately in the form 'S<Subst>' or 'A<Subst>'.
	///
	/// The Mangler class must provide the following:
	/// *) Buffer: A stream where the mangled identifier is written to.
	/// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
	/// *) resetBuffer(size_t): Resets the buffer to an old position.
	template <typename Mangler>
	bool tryMergeSubst(Mangler &M, char Subst, bool isStandardSubst) {
	assert(isUpperLetter(Subst) \|\| (isStandardSubst && isLowerLetter(Subst)));
	StringRef BufferStr = M.getBufferStr();
	if (lastNumSubsts > 0 && lastNumSubsts < MaxRepeatCount
	&& BufferStr.size() == lastSubstPosition + lastSubstSize
	&& lastSubstIsStandardSubst == isStandardSubst) {

	// The last mangled thing is a substitution.
	assert(lastSubstPosition > 0 && lastSubstPosition < BufferStr.size());
	assert(lastSubstSize > 0);
	char lastSubst = BufferStr.back();
	assert(isUpperLetter(lastSubst)
	\|\| (isStandardSubst && isLowerLetter(lastSubst)));
	if (lastSubst != Subst && !isStandardSubst) {
	// We can merge with a different 'A' substitution,
	// e.g. 'AB' -> 'AbC'.
	lastSubstPosition = BufferStr.size();
	lastNumSubsts = 1;
	M.resetBuffer(BufferStr.size() - 1);
	assert(isUpperLetter(lastSubst));
	M.Buffer << (char)(lastSubst - 'A' + 'a') << Subst;
	lastSubstSize = 1;
	return true;
	}
	if (lastSubst == Subst) {
	// We can merge with the same 'A' or 'S' substitution,
	// e.g. 'AB' -> 'A2B', or 'S3i' -> 'S4i'
	lastNumSubsts++;
	M.resetBuffer(lastSubstPosition);
	M.Buffer << lastNumSubsts;
	M.Buffer << Subst;
	lastSubstSize = M.getBufferStr().size() - lastSubstPosition;
	return true;
	}
	}
	// We can't merge with the previous substitution, but let's remember this
	// substitution which will be mangled by the caller.
	lastSubstPosition = BufferStr.size() + 1;
	lastSubstSize = 1;
	lastNumSubsts = 1;
	lastSubstIsStandardSubst = isStandardSubst;
	return false;
	}
	};

	} // end namespace Mangle
	} // end namespace swift

	#endif // SWIFT_DEMANGLING_MANGLINGUTILS_H