blob: e15872004ddabc6c27e1a9c375c7924770bb95d1 [file] [log] [blame]
//===--- ManglingUtils.h - Utilities for Swift name mangling ----*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#ifndef SWIFT_DEMANGLING_MANGLINGUTILS_H
#define SWIFT_DEMANGLING_MANGLINGUTILS_H
#include "llvm/ADT/StringRef.h"
#include "swift/Demangling/NamespaceMacros.h"
#include "swift/Demangling/Punycode.h"
namespace swift {
namespace Mangle {
SWIFT_BEGIN_INLINE_NAMESPACE
using llvm::StringRef;
inline bool isLowerLetter(char ch) {
return ch >= 'a' && ch <= 'z';
}
inline bool isUpperLetter(char ch) {
return ch >= 'A' && ch <= 'Z';
}
inline bool isDigit(char ch) {
return ch >= '0' && ch <= '9';
}
inline bool isLetter(char ch) {
return isLowerLetter(ch) || isUpperLetter(ch);
}
/// Returns true if \p ch is a character which defines the begin of a
/// substitution word.
inline bool isWordStart(char ch) {
return !isDigit(ch) && ch != '_' && ch != 0;
}
/// Returns true if \p ch is a character (following \p prevCh) which defines
/// the end of a substitution word.
inline bool isWordEnd(char ch, char prevCh) {
if (ch == '_' || ch == 0)
return true;
if (!isUpperLetter(prevCh) && isUpperLetter(ch))
return true;
return false;
}
/// Returns true if \p ch is a valid character which may appear in a symbol
/// mangling.
inline bool isValidSymbolChar(char ch) {
return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$';
}
/// Returns true if \p str contains any character which may not appear in a
/// mangled symbol string and therefore must be punycode encoded.
bool needsPunycodeEncoding(StringRef str);
/// Returns true if \p str contains any non-ASCII character.
bool isNonAscii(StringRef str);
/// Describes a Word in a mangled identifier.
struct SubstitutionWord {
/// The position of the first word character in the mangled string.
size_t start;
/// The length of the word.
size_t length;
};
/// Helper struct which represents a word substitution.
struct WordReplacement {
/// The position in the identifier where the word is substituted.
size_t StringPos;
/// The index into the mangler's Words array (-1 if invalid).
int WordIdx;
};
/// Translate the given operator character into its mangled form.
///
/// Current operator characters: @/=-+*%<>!&|^~ and the special operator '..'
char translateOperatorChar(char op);
/// Returns a string where all characters of the operator \p Op are translated
/// to their mangled form.
std::string translateOperator(StringRef Op);
/// Returns the standard type kind for an 'S' substitution, e.g. 'i' for "Int".
char getStandardTypeSubst(StringRef TypeName);
/// Mangles an identifier using a generic Mangler class.
///
/// The Mangler class must provide the following:
/// *) Words: An array of SubstitutionWord which holds the current list of
/// found words which can be used for substitutions.
/// *) SubstWordsInIdent: An array of WordReplacement, which is just used
/// as a temporary storage during mangling. Must be empty.
/// *) Buffer: A stream where the mangled identifier is written to.
/// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
/// *) UsePunycode: A flag indicating if punycode encoding should be done.
template <typename Mangler>
void mangleIdentifier(Mangler &M, StringRef ident) {
size_t WordsInBuffer = M.Words.size();
assert(M.SubstWordsInIdent.empty());
if (M.UsePunycode && needsPunycodeEncoding(ident)) {
// If the identifier contains non-ASCII character, we mangle
// with an initial '00' and Punycode the identifier string.
std::string punycodeBuf;
Punycode::encodePunycodeUTF8(ident, punycodeBuf,
/*mapNonSymbolChars*/ true);
StringRef pcIdent = punycodeBuf;
M.Buffer << "00" << pcIdent.size();
if (isDigit(pcIdent[0]) || pcIdent[0] == '_')
M.Buffer << '_';
M.Buffer << pcIdent;
return;
}
// Search for word substitutions and for new words.
const size_t NotInsideWord = ~0;
size_t wordStartPos = NotInsideWord;
for (size_t Pos = 0, Len = ident.size(); Pos <= Len; ++Pos) {
char ch = (Pos < Len ? ident[Pos] : 0);
if (wordStartPos != NotInsideWord && isWordEnd(ch, ident[Pos - 1])) {
// This position is the end of a word, i.e. the next character after a
// word.
assert(Pos > wordStartPos);
size_t wordLen = Pos - wordStartPos;
StringRef Word = ident.substr(wordStartPos, wordLen);
// Helper function to lookup the Word in a string.
auto lookupWord = [&] (StringRef Str,
size_t FromWordIdx, size_t ToWordIdx) -> int {
for (size_t Idx = FromWordIdx; Idx < ToWordIdx; ++Idx) {
const SubstitutionWord &w = M.Words[Idx];
StringRef existingWord = Str.substr(w.start, w.length);
if (Word == existingWord)
return (int)Idx;
}
return -1;
};
// Is the word already present in the so far mangled string?
int WordIdx = lookupWord(M.getBufferStr(), 0, WordsInBuffer);
// Otherwise, is the word already present in this identifier?
if (WordIdx < 0)
WordIdx = lookupWord(ident, WordsInBuffer, M.Words.size());
if (WordIdx >= 0) {
// We found a word substitution!
assert(WordIdx < 26);
M.addSubstWordsInIdent({wordStartPos, WordIdx});
} else if (wordLen >= 2 && M.Words.size() < M.MaxNumWords) {
// It's a new word: remember it.
// Note: at this time the word's start position is relative to the
// begin of the identifier. We must update it afterwards so that it is
// relative to the begin of the whole mangled Buffer.
M.addWord({wordStartPos, wordLen});
}
wordStartPos = NotInsideWord;
}
if (wordStartPos == NotInsideWord && isWordStart(ch)) {
// This position is the begin of a word.
wordStartPos = Pos;
}
}
// If we have word substitutions mangle an initial '0'.
if (!M.SubstWordsInIdent.empty())
M.Buffer << '0';
size_t Pos = 0;
// Add a dummy-word at the end of the list.
M.addSubstWordsInIdent({ident.size(), -1});
// Mangle a sequence of word substitutions and sub-strings.
for (size_t Idx = 0, End = M.SubstWordsInIdent.size(); Idx < End; ++Idx) {
const WordReplacement &Repl = M.SubstWordsInIdent[Idx];
if (Pos < Repl.StringPos) {
// Mangle the sub-string up to the next word substitution (or to the end
// of the identifier - that's why we added the dummy-word).
// The first thing: we add the encoded sub-string length.
bool first = true;
M.Buffer << (Repl.StringPos - Pos);
do {
// Update the start position of new added words, so that they refer to
// the begin of the whole mangled Buffer.
if (WordsInBuffer < M.Words.size() &&
M.Words[WordsInBuffer].start == Pos) {
M.Words[WordsInBuffer].start = M.getBufferStr().size();
WordsInBuffer++;
}
// Error recovery. We sometimes need to mangle identifiers coming
// from invalid code.
if (first && isDigit(ident[Pos]))
M.Buffer << 'X';
// Add a literal character of the sub-string.
else
M.Buffer << ident[Pos];
Pos++;
first = false;
} while (Pos < Repl.StringPos);
}
// Is it a "real" word substitution (and not the dummy-word)?
if (Repl.WordIdx >= 0) {
assert(Repl.WordIdx <= (int)WordsInBuffer);
Pos += M.Words[Repl.WordIdx].length;
if (Idx < End - 2) {
M.Buffer << (char)(Repl.WordIdx + 'a');
} else {
// The last word substitution is a capital letter.
M.Buffer << (char)(Repl.WordIdx + 'A');
if (Pos == ident.size())
M.Buffer << '0';
}
}
}
M.SubstWordsInIdent.clear();
}
/// Utility class for mangling merged substitutions.
///
/// Used in the Mangler and Remangler.
class SubstitutionMerging {
/// The position of the last substitution mangling,
/// e.g. 3 for 'AabC' and 'Aab4C'
size_t lastSubstPosition = 0;
/// The size of the last substitution mangling,
/// e.g. 1 for 'AabC' or 2 for 'Aab4C'
size_t lastSubstSize = 0;
/// The repeat count of the last substitution,
/// e.g. 1 for 'AabC' or 4 for 'Aab4C'
size_t lastNumSubsts = 0;
/// True if the last substitution is an 'S' substitution,
/// false if the last substitution is an 'A' substitution.
bool lastSubstIsStandardSubst = false;
public:
// The only reason to limit the number of repeated substitutions is that we
// don't want that the demangler blows up on a bogus substitution, e.g.
// ...A832456823746582B...
enum { MaxRepeatCount = 2048 };
void clear() {
lastNumSubsts = 0;
}
/// Tries to merge the substitution \p Subst with a previously mangled
/// substitution.
///
/// Returns true on success. In case of false, the caller must mangle the
/// substitution separately in the form 'S<Subst>' or 'A<Subst>'.
///
/// The Mangler class must provide the following:
/// *) Buffer: A stream where the mangled identifier is written to.
/// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
/// *) resetBuffer(size_t): Resets the buffer to an old position.
template <typename Mangler>
bool tryMergeSubst(Mangler &M, char Subst, bool isStandardSubst) {
assert(isUpperLetter(Subst) || (isStandardSubst && isLowerLetter(Subst)));
StringRef BufferStr = M.getBufferStr();
if (lastNumSubsts > 0 && lastNumSubsts < MaxRepeatCount
&& BufferStr.size() == lastSubstPosition + lastSubstSize
&& lastSubstIsStandardSubst == isStandardSubst) {
// The last mangled thing is a substitution.
assert(lastSubstPosition > 0 && lastSubstPosition < BufferStr.size());
assert(lastSubstSize > 0);
char lastSubst = BufferStr.back();
assert(isUpperLetter(lastSubst)
|| (isStandardSubst && isLowerLetter(lastSubst)));
if (lastSubst != Subst && !isStandardSubst) {
// We can merge with a different 'A' substitution,
// e.g. 'AB' -> 'AbC'.
lastSubstPosition = BufferStr.size();
lastNumSubsts = 1;
M.resetBuffer(BufferStr.size() - 1);
assert(isUpperLetter(lastSubst));
M.Buffer << (char)(lastSubst - 'A' + 'a') << Subst;
lastSubstSize = 1;
return true;
}
if (lastSubst == Subst) {
// We can merge with the same 'A' or 'S' substitution,
// e.g. 'AB' -> 'A2B', or 'S3i' -> 'S4i'
lastNumSubsts++;
M.resetBuffer(lastSubstPosition);
M.Buffer << lastNumSubsts;
M.Buffer << Subst;
lastSubstSize = M.getBufferStr().size() - lastSubstPosition;
return true;
}
}
// We can't merge with the previous substitution, but let's remember this
// substitution which will be mangled by the caller.
lastSubstPosition = BufferStr.size() + 1;
lastSubstSize = 1;
lastNumSubsts = 1;
lastSubstIsStandardSubst = isStandardSubst;
return false;
}
};
SWIFT_END_INLINE_NAMESPACE
} // end namespace Mangle
} // end namespace swift
#endif // SWIFT_DEMANGLING_MANGLINGUTILS_H