| /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying |
| file Copyright.txt or https://cmake.org/licensing for details. */ |
| #include "cmXMLSafe.h" |
| |
| #include <cstdio> |
| #include <cstring> |
| #include <sstream> |
| |
| #include "cm_utf8.h" |
| |
| cmXMLSafe::cmXMLSafe(const char* s) |
| : Data(s) |
| , Size(static_cast<unsigned long>(strlen(s))) |
| , DoQuotes(true) |
| { |
| } |
| |
| cmXMLSafe::cmXMLSafe(std::string const& s) |
| : Data(s.c_str()) |
| , Size(static_cast<unsigned long>(s.length())) |
| , DoQuotes(true) |
| { |
| } |
| |
| cmXMLSafe& cmXMLSafe::Quotes(bool b) |
| { |
| this->DoQuotes = b; |
| return *this; |
| } |
| |
| std::string cmXMLSafe::str() const |
| { |
| std::ostringstream ss; |
| ss << *this; |
| return ss.str(); |
| } |
| |
| std::ostream& operator<<(std::ostream& os, cmXMLSafe const& self) |
| { |
| char const* first = self.Data; |
| char const* last = self.Data + self.Size; |
| while (first != last) { |
| unsigned int ch; |
| if (const char* next = cm_utf8_decode_character(first, last, &ch)) { |
| // http://www.w3.org/TR/REC-xml/#NT-Char |
| if ((ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD) || |
| (ch >= 0x10000 && ch <= 0x10FFFF) || ch == 0x9 || ch == 0xA || |
| ch == 0xD) { |
| switch (ch) { |
| // Escape XML control characters. |
| case '&': |
| os << "&"; |
| break; |
| case '<': |
| os << "<"; |
| break; |
| case '>': |
| os << ">"; |
| break; |
| case '"': |
| os << (self.DoQuotes ? """ : "\""); |
| break; |
| case '\'': |
| os << (self.DoQuotes ? "'" : "'"); |
| break; |
| case '\r': |
| break; // Ignore CR |
| // Print the UTF-8 character. |
| default: |
| os.write(first, next - first); |
| break; |
| } |
| } else { |
| // Use a human-readable hex value for this invalid character. |
| char buf[16]; |
| snprintf(buf, sizeof(buf), "%X", ch); |
| os << "[NON-XML-CHAR-0x" << buf << "]"; |
| } |
| |
| first = next; |
| } else { |
| ch = static_cast<unsigned char>(*first++); |
| // Use a human-readable hex value for this invalid byte. |
| char buf[16]; |
| snprintf(buf, sizeof(buf), "%X", ch); |
| os << "[NON-UTF-8-BYTE-0x" << buf << "]"; |
| } |
| } |
| return os; |
| } |