| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See http://swift.org/LICENSE.txt for license information |
| // See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| |
| |
| /* CFURLComponents_URIParser.c |
| Copyright (c) 2015, Apple Inc. All rights reserved. |
| Responsibility: Jim Luther/Chris Linn |
| */ |
| |
| #include <CoreFoundation/CFBase.h> |
| #include <CoreFoundation/CFString.h> |
| #include "CFURLComponents_Internal.h" |
| #include "CFInternal.h" |
| |
| typedef CF_ENUM(CFIndex, URLPredefinedCharacterSet) { |
| kURLUserAllowedCharacterSet = 0, |
| kURLPasswordAllowedCharacterSet = 1, |
| kURLHostAllowedCharacterSet = 2, |
| kURLPathAllowedCharacterSet = 3, |
| kURLQueryAllowedCharacterSet = 4, |
| kURLFragmentAllowedCharacterSet = 5, |
| kURLAllowedCharacterSetIllegal = 6 |
| }; |
| |
| // IMPORTANT: the kURLxxxxAllowedCharacters definitions MUST match sURLAllowedCharacters (except for the '[', ':' and ']' characters in kURLHostAllowedCharacters are not kURLHostAllowed, and ';' is not in kURLPathAllowedCharacters, but is special cased in kURLPathAllowed) |
| #define kURLUserAllowedCharacters "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" |
| #define kURLPasswordAllowedCharacters "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" |
| #define kURLHostAllowedCharacters "!$&'()*+,-.0123456789:;=ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~" |
| #define kURLPathAllowedCharacters "!$&'()*+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" // ";" is disallowed in paths for compatibility with API which uses rfc1808 parsing where ";" was the separator between path and param. ":" is allowed only after a "/" (it cannot be in the first segment in some cases) |
| #define kURLQueryAllowedCharacters "!$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" |
| #define kURLFragmentAllowedCharacters "!$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" |
| |
| // IMPORTANT: the kURLxxxxAllowedCharacters definitions MUST match sURLAllowedCharacters (except for the '[', ':' and ']' characters in kURLHostAllowedCharacters are not kURLHostAllowed) |
| static const unsigned short sURLAllowedCharacters[128] = { |
| /* nul */ 0, |
| /* soh */ 0, |
| /* stx */ 0, |
| /* etx */ 0, |
| /* eot */ 0, |
| /* enq */ 0, |
| /* ack */ 0, |
| /* bel */ 0, |
| /* bs */ 0, |
| /* ht */ 0, |
| /* nl */ 0, |
| /* vt */ 0, |
| /* np */ 0, |
| /* cr */ 0, |
| /* so */ 0, |
| /* si */ 0, |
| /* dle */ 0, |
| /* dc1 */ 0, |
| /* dc2 */ 0, |
| /* dc3 */ 0, |
| /* dc4 */ 0, |
| /* nak */ 0, |
| /* syn */ 0, |
| /* etb */ 0, |
| /* can */ 0, |
| /* em */ 0, |
| /* sub */ 0, |
| /* esc */ 0, |
| /* fs */ 0, |
| /* gs */ 0, |
| /* rs */ 0, |
| /* us */ 0, |
| /* sp */ 0, |
| /* '!' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '"' */ 0, |
| /* '#' */ 0, |
| /* '$' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '%' */ 0, |
| /* '&' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* ''' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '(' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* ')' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '*' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '+' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* ',' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '-' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '.' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '/' */ kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '0' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '1' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '2' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '3' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '4' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '5' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '6' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '7' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '8' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* '9' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPortAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed, |
| /* ':' */ kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* ';' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '<' */ 0, |
| /* '=' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '>' */ 0, |
| /* '?' */ kURLQueryAllowed | kURLFragmentAllowed, |
| /* '@' */ kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* 'A' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'B' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'C' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'D' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'E' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'F' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'G' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'H' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'I' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'J' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'K' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'L' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'M' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'N' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'O' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'P' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'Q' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'R' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'S' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'T' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'U' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'V' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'W' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'X' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'Y' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'Z' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* '[' */ 0, |
| /* '\' */ 0, |
| /* ']' */ 0, |
| /* '^' */ 0, |
| /* '_' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* '`' */ 0, |
| /* 'a' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'b' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'c' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'd' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'e' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'f' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLHexDigAllowed | kURLAlphaAllowed, |
| /* 'g' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'h' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'i' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'j' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'k' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'l' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'm' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'n' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'o' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'p' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'q' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'r' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 's' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 't' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'u' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'v' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'w' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'x' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'y' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* 'z' */ kURLSchemeAllowed | kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed | kURLAlphaAllowed, |
| /* '{' */ 0, |
| /* '|' */ 0, |
| /* '}' */ 0, |
| /* '~' */ kURLUserAllowed | kURLPasswordAllowed | kURLHostAllowed | kURLPathAllowed | kURLQueryAllowed | kURLFragmentAllowed, |
| /* del */ 0, |
| }; |
| |
| // an array of CFCharacterSetRef for each of the predefined allowed character sets |
| static CFCharacterSetRef *sURLAllowedCharacterSets = NULL; |
| |
| /* Create the sURLAllowedCharacterSets array. |
| */ |
| static void InitializeURLAllowedCharacterSets(void) |
| { |
| static dispatch_once_t static_init = 0; |
| dispatch_once(&static_init, ^{ |
| sURLAllowedCharacterSets = (CFCharacterSetRef *)CFAllocatorAllocate(kCFAllocatorDefault, sizeof(CFCharacterSetRef) * kURLAllowedCharacterSetIllegal, 0); |
| sURLAllowedCharacterSets[kURLUserAllowedCharacterSet] = CFCharacterSetCreateWithCharactersInString(kCFAllocatorDefault, CFSTR(kURLUserAllowedCharacters)); |
| sURLAllowedCharacterSets[kURLPasswordAllowedCharacterSet] = CFCharacterSetCreateWithCharactersInString(kCFAllocatorDefault, CFSTR(kURLPasswordAllowedCharacters)); |
| sURLAllowedCharacterSets[kURLHostAllowedCharacterSet] = CFCharacterSetCreateWithCharactersInString(kCFAllocatorDefault, CFSTR(kURLHostAllowedCharacters)); |
| sURLAllowedCharacterSets[kURLPathAllowedCharacterSet] = CFCharacterSetCreateWithCharactersInString(kCFAllocatorDefault, CFSTR(kURLPathAllowedCharacters)); |
| sURLAllowedCharacterSets[kURLQueryAllowedCharacterSet] = CFCharacterSetCreateWithCharactersInString(kCFAllocatorDefault, CFSTR(kURLQueryAllowedCharacters)); |
| sURLAllowedCharacterSets[kURLFragmentAllowedCharacterSet] = CFCharacterSetCreateWithCharactersInString(kCFAllocatorDefault, CFSTR(kURLFragmentAllowedCharacters)); |
| }); |
| } |
| |
| /* Returns the set from sURLAllowedCharacterSets for the given URLPredefinedCharacterSet. |
| */ |
| static CFCharacterSetRef GetURLAllowedCharacterSet(URLPredefinedCharacterSet allowedSet) |
| { |
| CFCharacterSetRef result; |
| |
| // make sure sURLAllowedCharacterSets is initialized |
| InitializeURLAllowedCharacterSets(); |
| // return the URLPredefinedCharacterSet requested |
| if ( (allowedSet >= 0) && (allowedSet < kURLAllowedCharacterSetIllegal) ) { |
| result = sURLAllowedCharacterSets[allowedSet]; |
| } |
| else { |
| result = nil; |
| } |
| return ( result ); |
| } |
| |
| /* Returns the URLPredefinedCharacterSet that matches the set, or kURLAllowedCharacterSetIllegal if there is no match. |
| */ |
| static URLPredefinedCharacterSet GetURLPredefinedCharacterSet(CFCharacterSetRef characterSet) |
| { |
| URLPredefinedCharacterSet result; |
| |
| // make sure sURLAllowedCharacterSets is initialized |
| InitializeURLAllowedCharacterSets(); |
| // see if characterSet is one of the URLPredefinedCharacterSets |
| if ( characterSet ) { |
| for ( result = 0; result < kURLAllowedCharacterSetIllegal; ++result ) { |
| // yes, I really want a pointer comparison because some of the sURLAllowedCharacterSets have the same bitmaps |
| if ( characterSet == sURLAllowedCharacterSets[result] ) { |
| break; |
| } |
| } |
| } |
| else { |
| result = kURLAllowedCharacterSetIllegal; |
| } |
| |
| return ( result ); |
| } |
| |
| |
| //****************************************************************************** |
| |
| CF_EXPORT CFCharacterSetRef _CFURLComponentsGetURLUserAllowedCharacterSet() { |
| return ( GetURLAllowedCharacterSet(kURLUserAllowedCharacterSet) ); |
| } |
| |
| CF_EXPORT CFCharacterSetRef _CFURLComponentsGetURLPasswordAllowedCharacterSet() { |
| return ( GetURLAllowedCharacterSet(kURLPasswordAllowedCharacterSet) ); |
| } |
| |
| CF_EXPORT CFCharacterSetRef _CFURLComponentsGetURLHostAllowedCharacterSet() { |
| return ( GetURLAllowedCharacterSet(kURLHostAllowedCharacterSet) ); |
| } |
| |
| CF_EXPORT CFCharacterSetRef _CFURLComponentsGetURLPathAllowedCharacterSet() { |
| return ( GetURLAllowedCharacterSet(kURLPathAllowedCharacterSet) ); |
| } |
| |
| CF_EXPORT CFCharacterSetRef _CFURLComponentsGetURLQueryAllowedCharacterSet() { |
| return ( GetURLAllowedCharacterSet(kURLQueryAllowedCharacterSet) ); |
| } |
| |
| CF_EXPORT CFCharacterSetRef _CFURLComponentsGetURLFragmentAllowedCharacterSet() { |
| return ( GetURLAllowedCharacterSet(kURLFragmentAllowedCharacterSet) ); |
| } |
| |
| |
| #if 0 |
| #pragma mark - |
| #pragma mark CFString extensions |
| #endif |
| |
| CF_EXPORT CFStringRef _CFStringCreateByAddingPercentEncodingWithAllowedCharacters(CFAllocatorRef alloc, CFStringRef string, CFCharacterSetRef allowedCharacters) { |
| CFStringRef result = NULL; |
| CFIndex inLength = CFStringGetLength((CFStringRef)string); |
| |
| if ( inLength == 0 ) { |
| // No characters? Nothing to percent-encode. |
| result = CFStringCreateCopy(alloc, string); |
| } |
| else { |
| static const UInt8 hexchars[] = "0123456789ABCDEF"; |
| CFIndex maxBufferSize = CFStringGetMaximumSizeForEncoding(inLength, kCFStringEncodingUTF8); |
| enum { |
| kStackBufferSize = 4096, |
| kInStackBufferSize = kStackBufferSize / 4, |
| kOutStackBufferSize = kInStackBufferSize * 3, |
| }; |
| STACK_BUFFER_DECL(UInt8, stackBuffer, kStackBufferSize); |
| UInt8 *inBuf; |
| UInt8 *outBuf; |
| // choose a buffer to put the input bytes AND output bytes into |
| if ( maxBufferSize <= kInStackBufferSize ) { |
| inBuf = &stackBuffer[0]; |
| } |
| else { |
| // not big enough? malloc it. |
| inBuf = (UInt8 *)malloc(maxBufferSize * 4); |
| } |
| if ( inBuf ) { |
| CFIndex charsConverted; |
| CFIndex inLen; |
| // use the other 3/4 of the buffer for the percent-encoded bytes |
| outBuf = &inBuf[maxBufferSize]; |
| |
| charsConverted = CFStringGetBytes(string, CFRangeMake(0, inLength), kCFStringEncodingUTF8, 0, false, inBuf, maxBufferSize, &inLen); |
| if ( charsConverted ) { |
| UInt8 *inBytePtr = inBuf; |
| UInt8 *outBytePtr = outBuf; |
| CFIndex idx; |
| |
| // there are two very similar loops below -- they aren't combined because I didn't want an extra comparison per character to determine which code was going to set the allowed variable. |
| |
| URLPredefinedCharacterSet allowedSet = GetURLPredefinedCharacterSet(allowedCharacters); |
| if ( allowedSet != kURLAllowedCharacterSetIllegal ) { |
| // fastest -- allowedCharacters is one of the predefined sets so use sURLAllowedCharacters to determine what characters are allowed |
| unsigned char allowedMask; |
| Boolean isIPLiteral = false; |
| |
| // determine the allowedMask |
| switch (allowedSet) { |
| case kURLUserAllowedCharacterSet: |
| allowedMask = kURLUserAllowed; |
| break; |
| case kURLPasswordAllowedCharacterSet: |
| allowedMask = kURLPasswordAllowed; |
| break; |
| case kURLHostAllowedCharacterSet: |
| allowedMask = kURLHostAllowed; |
| // if the host is an IP-Literal, percent-encode everything within the brackets but not the brackets |
| if ( (inLen >= 2) && (*inBytePtr == '[') && (inBytePtr[inLen - 1] == ']') ) { |
| isIPLiteral = true; |
| ++inBytePtr; |
| // copy the open bracket |
| *outBytePtr++ = '['; |
| inLen -= 2; |
| } |
| break; |
| case kURLPathAllowedCharacterSet: |
| allowedMask = kURLPathAllowed; |
| break; |
| case kURLQueryAllowedCharacterSet: |
| allowedMask = kURLQueryAllowed; |
| break; |
| case kURLFragmentAllowedCharacterSet: |
| allowedMask = kURLFragmentAllowed; |
| break; |
| default: |
| // GetURLPredefinedCharacterSet will return one of the above or kURLAllowedCharacterSetIllegal so this will never be hit |
| allowedMask = 0; |
| break; |
| } |
| if ( allowedSet == kURLPathAllowedCharacterSet ) { |
| Boolean pastSlash = false; |
| for ( idx = 0; idx < inLen; ++idx ) { |
| UInt8 ch = *inBytePtr++; |
| if ( pastSlash ) { |
| // !!!: percent encode ';' for backwards compatibility with API which uses rfc1808 parsing |
| Boolean allowed = (ch <= 127) && (ch != ';') && ((sURLAllowedCharacters[ch] & allowedMask) != 0); |
| if ( allowed ) { |
| *outBytePtr++ = ch; |
| } |
| else { |
| *outBytePtr++ = '%'; |
| *outBytePtr++ = hexchars[ch >> 4]; |
| *outBytePtr++ = hexchars[ch & 0x0f]; |
| } |
| } |
| else { |
| if ( ch == '/' ) { |
| pastSlash = true; |
| } |
| // !!!: percent encode ';' for backwards compatibility with API which uses rfc1808 parsing |
| Boolean allowed = (ch <= 127) && (ch != ';') && (ch != ':') && ((sURLAllowedCharacters[ch] & allowedMask) != 0); |
| if ( allowed ) { |
| *outBytePtr++ = ch; |
| } |
| else { |
| *outBytePtr++ = '%'; |
| *outBytePtr++ = hexchars[ch >> 4]; |
| *outBytePtr++ = hexchars[ch & 0x0f]; |
| } |
| } |
| } |
| } |
| else if ( allowedSet == kURLHostAllowedCharacterSet ) { |
| for ( idx = 0; idx < inLen; ++idx ) { |
| UInt8 ch = *inBytePtr++; |
| Boolean allowed = (ch <= 127) && ((sURLAllowedCharacters[ch] & allowedMask) != 0); |
| if ( allowed || (isIPLiteral && ch == ':') ) { // the colon is allowed in IP-Literal |
| *outBytePtr++ = ch; |
| } |
| else { |
| *outBytePtr++ = '%'; |
| *outBytePtr++ = hexchars[ch >> 4]; |
| *outBytePtr++ = hexchars[ch & 0x0f]; |
| } |
| } |
| if ( isIPLiteral ) { |
| // copy the close bracket |
| *outBytePtr++ = ']'; |
| } |
| } |
| else { |
| for ( idx = 0; idx < inLen; ++idx ) { |
| UInt8 ch = *inBytePtr++; |
| Boolean allowed = (ch <= 127) && ((sURLAllowedCharacters[ch] & allowedMask) != 0); |
| if ( allowed ) { |
| *outBytePtr++ = ch; |
| } |
| else { |
| *outBytePtr++ = '%'; |
| *outBytePtr++ = hexchars[ch >> 4]; |
| *outBytePtr++ = hexchars[ch & 0x0f]; |
| } |
| } |
| } |
| } |
| else { |
| // use the allowedCharacters NSCharacterSet to determine what characters are allowed |
| // non-ASCII characters are ignored |
| for ( idx = 0; idx < inLen; ++idx ) { |
| UInt8 ch = *inBytePtr++; |
| // CFCharacterSet |
| Boolean allowed = (ch <= 127) && CFCharacterSetIsCharacterMember((CFCharacterSetRef)allowedCharacters, ch); |
| if ( allowed ) { |
| *outBytePtr++ = ch; |
| } |
| else { |
| *outBytePtr++ = '%'; |
| *outBytePtr++ = hexchars[ch >> 4]; |
| *outBytePtr++ = hexchars[ch & 0x0f]; |
| } |
| } |
| } |
| |
| result = CFStringCreateWithBytes(kCFAllocatorDefault, outBuf, outBytePtr - outBuf, kCFStringEncodingUTF8, false); |
| } |
| |
| if ( inBuf != stackBuffer ) { |
| free(inBuf); |
| } |
| } |
| } |
| return ( result ); |
| } |
| |
| CF_EXPORT CFStringRef _CFStringCreateByRemovingPercentEncoding(CFAllocatorRef alloc, CFStringRef string) { |
| CFStringRef result = NULL; |
| CFIndex strLength = CFStringGetLength(string); |
| if ( strLength ) { |
| CFIndex maxBufferSize = CFStringGetMaximumSizeForEncoding(strLength, kCFStringEncodingUTF8); |
| enum { |
| kStackBufferSize = 4096, |
| kHalfStackBufferSize = kStackBufferSize / 2, |
| |
| }; |
| STACK_BUFFER_DECL(UInt8, stackBuffer, kStackBufferSize); |
| UInt8 *encodedBuf; |
| UInt8 *decodedBuf; |
| // choose a buffer to put the percent-encoded bytes AND to percent-decode into |
| if ( maxBufferSize <= kHalfStackBufferSize ) { |
| encodedBuf = &stackBuffer[0]; |
| } |
| else { |
| // not big enough? malloc it. |
| encodedBuf = (UInt8 *)malloc(maxBufferSize * 2); |
| } |
| if ( encodedBuf ) { |
| CFIndex charsConverted; |
| CFIndex usedBufLen; |
| // use the other half of the buffer for the percent-decoded bytes |
| decodedBuf = &encodedBuf[maxBufferSize]; |
| charsConverted = CFStringGetBytes(string, CFRangeMake(0, strLength), kCFStringEncodingUTF8, 0, false, encodedBuf, maxBufferSize, &usedBufLen); |
| if ( charsConverted ) { |
| // 0x80 marks invalid hex digits so this table can validate the digits while getting the values |
| static const UInt8 hexvalues[] = { |
| /* 00 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 08 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 10 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 18 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 20 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 28 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 30 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| /* 38 */ 0x08, 0x09, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 40 */ 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80, |
| /* 48 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 50 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 58 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 60 */ 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80, |
| /* 68 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 70 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 78 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| |
| /* 80 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 88 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 90 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* 98 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* A0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* A8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* B0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* B8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* C0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* C8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* D0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* D8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* E0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* E8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* F0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| /* F8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, |
| }; |
| UInt8 *bufStartPtr; |
| UInt8 *bufPtr; |
| const UInt8 *bytePtr = encodedBuf; |
| CFIndex idx; |
| |
| bufPtr = bufStartPtr = decodedBuf; |
| Boolean conversionOK = TRUE; |
| |
| for ( idx = 0; (idx < usedBufLen) && conversionOK; ++idx ) { |
| switch ( *bytePtr ) { |
| case '%': |
| idx += 2; |
| if ( idx < usedBufLen ) { |
| UInt8 hex1, hex2; |
| // skip over % |
| bytePtr++; |
| // get the hex digits |
| hex1 = hexvalues[*bytePtr++]; |
| hex2 = hexvalues[*bytePtr++]; |
| // validate them |
| if ( ((hex1 | hex2) & 0x80) == 0 ) { |
| // convert hex digits |
| *bufPtr = (hex1 << 4) + hex2; |
| } |
| else { |
| conversionOK = FALSE; |
| } |
| } |
| else { |
| conversionOK = FALSE; |
| } |
| break; |
| default: |
| // copy everything else |
| *bufPtr = *bytePtr++; |
| break; |
| } |
| ++bufPtr; |
| } |
| if ( conversionOK ) { |
| result = CFStringCreateWithBytes(kCFAllocatorDefault, decodedBuf, bufPtr - bufStartPtr, kCFStringEncodingUTF8, false); |
| } |
| } |
| |
| // free the buffer if we malloc'd it |
| if ( encodedBuf != &stackBuffer[0] ) { |
| free(encodedBuf); |
| } |
| } |
| } |
| else { |
| result = CFStringCreateCopy(alloc, string); |
| } |
| return ( result ); |
| } |
| |
| |
| #if 0 |
| #pragma mark - |
| #pragma mark URI Parser |
| #endif |
| |
| /* |
| * Called by ParseURIReference to parse the authority component to find the |
| * userInfo, host and port. The results of the parse are returned in the fields |
| * of parseInfo. |
| */ |
| static inline void _ParseAuthority(CFStringInlineBuffer *buf, unsigned long authorityLocation, unsigned long authorityLength, struct _URIParseInfo *parseInfo) |
| { |
| Boolean doneParsingComponent; |
| Boolean isIPLiteral; |
| unsigned long currentCharIndex; |
| unsigned long endCharIndex; |
| UniChar currentUniChar; |
| unsigned long userinfoCharIndex; |
| |
| currentCharIndex = authorityLocation; |
| endCharIndex = authorityLocation + authorityLength; |
| |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| |
| // |
| // find userinfo |
| // |
| userinfoCharIndex = currentCharIndex; |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentCharIndex != endCharIndex ) { |
| if ( currentUniChar == '@' ) { |
| // parse the userinfo |
| unsigned long userinfoEndCharIndex = currentCharIndex; |
| Boolean doneParsingUserInfo; |
| UniChar userinfoUniChar; |
| |
| userinfoUniChar = CFStringGetCharacterFromInlineBuffer(buf, userinfoCharIndex); |
| |
| // there's always a userinfoName if there's userinfo... it just might be zero length |
| parseInfo->userinfoNameExists = true; |
| parseInfo->userinfoNameOffset = userinfoCharIndex; |
| |
| // find end of userinfoName and determine if there's a userinfoPassword |
| doneParsingUserInfo = false; |
| while ( !doneParsingUserInfo ) { |
| if ( userinfoCharIndex != userinfoEndCharIndex ) { |
| if ( userinfoUniChar == ':' ) { |
| parseInfo->userinfoPasswordExists = true; |
| parseInfo->userinfoPasswordOffset = userinfoCharIndex + 1; |
| doneParsingUserInfo = true; |
| } |
| else { |
| ++userinfoCharIndex; |
| userinfoUniChar = CFStringGetCharacterFromInlineBuffer(buf, userinfoCharIndex); |
| } |
| } |
| else { |
| doneParsingUserInfo = true; |
| } |
| } |
| |
| // next character |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| doneParsingComponent = true; |
| } |
| else { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| } |
| } |
| else { |
| // there was no userinfo -- reset currentChar and result |
| currentCharIndex = userinfoCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| doneParsingComponent = true; |
| } |
| } |
| |
| // |
| // find host |
| // |
| // there's always a host if there's an authority... it just might be zero length |
| parseInfo->hostExists = true; |
| parseInfo->hostOffset = currentCharIndex; |
| |
| isIPLiteral = ( currentUniChar == '[' ); |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentCharIndex != endCharIndex ) { |
| if ( isIPLiteral ) { |
| if ( currentUniChar == ']' ) { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| doneParsingComponent = true; |
| } |
| else { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| } |
| } |
| else { |
| if ( currentUniChar == ':' ) { |
| doneParsingComponent = true; |
| } |
| else { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(buf, currentCharIndex); |
| } |
| } |
| } |
| else { |
| doneParsingComponent = true; |
| } |
| } |
| |
| // |
| // find port |
| // |
| if ( (currentCharIndex != endCharIndex) && (currentUniChar == ':') ) { |
| parseInfo->portExists = true; |
| parseInfo->portOffset = currentCharIndex + 1; |
| |
| // no need to find the end of the port unless we're validating legal characters |
| } |
| // else no port |
| } |
| |
| |
| /* |
| * Parse the uriReference find the scheme, authority, path, query and fragment |
| * components and their subcomponents. The results of the parse are returned in |
| * the fields of parseInfo. |
| */ |
| CF_PRIVATE Boolean _CFURIParserParseURIReference(CFStringRef urlString, struct _URIParseInfo *parseInfo) |
| { |
| Boolean doneParsingComponent; |
| |
| CFStringInlineBuffer buf; |
| unsigned long currentCharIndex; |
| unsigned long urlStringLength = CFStringGetLength(urlString); |
| UniChar currentUniChar; |
| |
| // clear the parseInfo |
| bzero(parseInfo, sizeof(*parseInfo)); |
| |
| // Make sure the URL string isn't too long. We're limiting it to 2GB for backwards compatibility with 32-bit excutables using NS/CFURL |
| if ( (urlStringLength > 0) && (urlStringLength <= INT_MAX) ) |
| { |
| CFStringInitInlineBuffer(urlString, &buf, CFRangeMake(0, urlStringLength)); |
| |
| parseInfo->endOffset = urlStringLength; |
| |
| // |
| // find scheme |
| // |
| |
| currentCharIndex = 0; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentUniChar == 0 ) { |
| doneParsingComponent = true; |
| // there was no scheme so this is a relative-ref -- reset currentChar and we're done looking for a scheme |
| currentCharIndex = 0; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| } |
| else { |
| switch ( currentUniChar ) { |
| case ':': |
| // !!!: This checks to make sure the scheme is at least 1 character. However, it makes this parser completely different than CFURL's parser when the string starts with a ":" character. |
| if ( currentCharIndex > 0 ) { |
| parseInfo->schemeExists = true; |
| // the scheme's offset is always 0 |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| doneParsingComponent = true; |
| } |
| else { |
| // there were no valid scheme characters before the ':' -- reset currentChar and we're done looking for a scheme |
| currentCharIndex = 0; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| doneParsingComponent = true; |
| } |
| break; |
| // !!!: These cases are commented out because default handles them. The scheme is validated as the URI string is parsed (unlike CFURL's parser). |
| // case '/': |
| // case '?': |
| // case '#': |
| // // there was no scheme so this is a relative-ref -- reset currentChar and we're done looking for a scheme |
| // currentCharIndex = 0; |
| // currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| // doneParsingComponent = true; |
| // break; |
| default: |
| if ( (currentUniChar <= 127) && ((sURLAllowedCharacters[currentUniChar] & kURLSchemeAllowed) != 0) ) { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| } |
| else { |
| // invalid scheme characters -- reset currentChar and we're done looking for a scheme |
| currentCharIndex = 0; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| doneParsingComponent = true; |
| } |
| break; |
| } |
| } |
| } |
| |
| // |
| // find authority |
| // |
| if ( (currentUniChar == '/') && (CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex + 1) == '/') ) { |
| unsigned long firstComponentCharIndex; |
| unsigned long componentLength; |
| |
| currentCharIndex += 2; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| |
| parseInfo->authorityExists = true; |
| |
| firstComponentCharIndex = currentCharIndex; |
| |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentUniChar == 0 ) { |
| componentLength = currentCharIndex - firstComponentCharIndex; |
| doneParsingComponent = true; |
| } |
| else { |
| switch ( currentUniChar ) { |
| case '/': |
| case '?': |
| case '#': |
| componentLength = currentCharIndex - firstComponentCharIndex; |
| doneParsingComponent = true; |
| break; |
| default: |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| break; |
| } |
| } |
| } |
| _ParseAuthority(&buf, firstComponentCharIndex, componentLength, parseInfo); |
| } |
| // else no authority |
| |
| // |
| // find path |
| // |
| |
| // there's always a path... it just might be zero length |
| parseInfo->pathOffset = currentCharIndex; |
| |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentUniChar == 0 ) { |
| doneParsingComponent = true; |
| } |
| else { |
| switch ( currentUniChar ) { |
| case '?': |
| case '#': |
| doneParsingComponent = true; |
| break; |
| case ';': |
| // keep track of the obsolete param subcomponent |
| if ( !(parseInfo->paramExists) ) { |
| parseInfo->paramExists = true; |
| parseInfo->paramOffset = currentCharIndex + 1; |
| } |
| // fall through to get next character |
| default: |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| break; |
| } |
| } |
| } |
| |
| // |
| // find query |
| // |
| if ( currentUniChar == '?' ) { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| |
| parseInfo->queryExists = true; |
| parseInfo->queryOffset = currentCharIndex; |
| |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentUniChar == 0 ) { |
| doneParsingComponent = true; |
| } |
| else { |
| switch ( currentUniChar ) { |
| case '#': |
| doneParsingComponent = true; |
| break; |
| default: |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| break; |
| } |
| } |
| } |
| } |
| // else no query |
| |
| // |
| // find fragment |
| // |
| if ( currentUniChar == '#' ) { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| |
| parseInfo->fragmentExists = true; |
| parseInfo->fragmentOffset = currentCharIndex; |
| |
| doneParsingComponent = false; |
| while ( !doneParsingComponent ) { |
| if ( currentUniChar == 0 ) { |
| doneParsingComponent = true; |
| } |
| else { |
| ++currentCharIndex; |
| currentUniChar = CFStringGetCharacterFromInlineBuffer(&buf, currentCharIndex); |
| } |
| } |
| } |
| // else no fragment |
| |
| return ( true ); |
| } |
| else { |
| return ( false ); |
| } |
| } |
| |
| |
| /* |
| * Returns the range of the scheme component. |
| * |
| * If includeSeparators is true, the characters that separate the scheme |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetSchemeRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->schemeExists ) { |
| if ( parseInfo->userinfoNameExists ) { |
| // end is userinfoNameOffset minus the '://' |
| result = CFRangeMake(0, parseInfo->userinfoNameOffset - (includeSeparators ? 0 : 3)); |
| } |
| else if ( parseInfo->userinfoPasswordExists ) { |
| // end is userinfoPasswordOffset minus the '://:' |
| result = CFRangeMake(0, parseInfo->userinfoPasswordOffset - (includeSeparators ? 1 : 4)); |
| } |
| else if ( parseInfo->hostExists ) { |
| // end is hostOffset minus the '://' |
| result = CFRangeMake(0, parseInfo->hostOffset - (includeSeparators ? 0 : 3)); |
| } |
| else if ( parseInfo->portExists ) { |
| // end is portOffset minus the '://:' |
| result = CFRangeMake(0, parseInfo->portOffset - (includeSeparators ? 1 : 4)); |
| } |
| else { |
| if ( parseInfo->authorityExists ) { |
| // end is pathOffset minus the '://' |
| result = CFRangeMake(0, parseInfo->pathOffset - (includeSeparators ? 0 : 3)); |
| } |
| else { |
| // end is pathOffset minus the ':' |
| result = CFRangeMake(0, parseInfo->pathOffset - (includeSeparators ? 0 : 1)); |
| } |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| #if 0 // unused but might be needed in the future |
| |
| /* |
| * Returns the range of the authority component. |
| * |
| * If includeSeparators is true, the characters that separate the authority |
| * from other components/subcomponents are included. |
| */ |
| static CFRange _CFURIParserGetAuthorityRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->authorityExists ) { |
| if ( parseInfo->userinfoNameExists ) { |
| // authority begins at userinfoNameOffset |
| // end is pathOffset |
| result = CFRangeMake(parseInfo->userinfoNameOffset, parseInfo->pathOffset - parseInfo->userinfoNameOffset); |
| } |
| else { |
| // authority begins at hostOffset |
| // end is pathOffset |
| result = CFRangeMake(parseInfo->hostOffset, parseInfo->pathOffset - parseInfo->hostOffset); |
| } |
| |
| if ( includeSeparators ) { |
| result.location -= 3; |
| result.length += 3; |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| #endif // unused but might be needed in the future |
| |
| |
| /* |
| * Returns the range of the userinfoName part of the userinfo sub-component of |
| * the authority component. |
| * |
| * If includeSeparators is true, the characters that separate the userinfoName |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetUserinfoNameRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->userinfoNameExists ) { |
| if ( parseInfo->userinfoPasswordExists ) { |
| // end is userinfoPasswordOffset minus the ':' |
| result = CFRangeMake(parseInfo->userinfoNameOffset, parseInfo->userinfoPasswordOffset - parseInfo->userinfoNameOffset - 1); |
| } |
| else if ( parseInfo->hostExists ) { |
| // end is hostOffset minus the '@' |
| result = CFRangeMake(parseInfo->userinfoNameOffset, parseInfo->hostOffset - parseInfo->userinfoNameOffset - 1); |
| } |
| else if ( parseInfo->portExists ) { |
| // end is portOffset minus the '@:' |
| result = CFRangeMake(parseInfo->userinfoNameOffset, parseInfo->portOffset - parseInfo->userinfoNameOffset - 2); |
| } |
| else { |
| // end is pathOffset minus the '@' |
| result = CFRangeMake(parseInfo->userinfoNameOffset, parseInfo->pathOffset - parseInfo->userinfoNameOffset - 1); |
| } |
| |
| if ( includeSeparators ) { |
| result.location -= 3; |
| result.length += 4; // either ends at ':' password delimiter, or at '@' host delimiter |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| /* |
| * Returns the range of the userinfoPassword part of the userinfo sub-component |
| * of the authority component. |
| * |
| * If includeSeparators is true, the characters that separate the userinfoPassword |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetUserinfoPasswordRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->userinfoPasswordExists ) { |
| if ( parseInfo->hostExists ) { |
| // end is hostOffset minus the '@' |
| result = CFRangeMake(parseInfo->userinfoPasswordOffset, parseInfo->hostOffset - parseInfo->userinfoPasswordOffset - 1); |
| } |
| else if ( parseInfo->portExists ) { |
| // end is portOffset minus the '@:' |
| result = CFRangeMake(parseInfo->userinfoPasswordOffset, parseInfo->portOffset - parseInfo->userinfoPasswordOffset - 2); |
| } |
| else { |
| // end is pathOffset minus the '@' |
| result = CFRangeMake(parseInfo->userinfoPasswordOffset, parseInfo->pathOffset - parseInfo->userinfoPasswordOffset - 1); |
| } |
| |
| if ( includeSeparators ) { |
| result.location--; |
| result.length += 2; |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| /* |
| * Returns the range of the host sub-component of the authority component. |
| * |
| * If includeSeparators is true, the characters that separate the host |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetHostRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->hostExists ) { |
| if ( parseInfo->portExists ) { |
| // end is portOffset minus the ':' |
| result = CFRangeMake(parseInfo->hostOffset, parseInfo->portOffset - parseInfo->hostOffset - 1); |
| } |
| else { |
| // end is pathOffset |
| result = CFRangeMake(parseInfo->hostOffset, parseInfo->pathOffset - parseInfo->hostOffset); |
| } |
| |
| if ( includeSeparators ) { |
| if ( parseInfo->userinfoNameExists ) { |
| result.location--; |
| result.length++; |
| } |
| else { |
| result.location -= 3; |
| result.length += 3; |
| } |
| if ( parseInfo->portExists ) { |
| result.length++; |
| } |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| /* |
| * Returns the range of the port sub-component of the authority component. |
| * |
| * If includeSeparators is true, the characters that separate the port |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetPortRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->portExists ) { |
| // end is pathOffset |
| result = CFRangeMake(parseInfo->portOffset, parseInfo->pathOffset - parseInfo->portOffset); |
| if ( includeSeparators ) { |
| result.location--; |
| result.length++; |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| /* |
| * Returns the range of the path component. |
| * |
| * If includeSeparators is true, the characters that separate the path |
| * from other components/subcomponents are included. |
| * |
| * If minusParam is false, the path component is the rfc3986 path. If minusParam |
| * is true, the path component ends at the first ';' character and the rest of |
| * the rfc3986 path after ';' is considered the obsolete rfc1808 param component. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetPathRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators, Boolean minusParam) |
| { |
| CFRange result; |
| |
| if ( minusParam && parseInfo->paramExists ) { |
| // end is paramOffset minus the ';' |
| result = CFRangeMake(parseInfo->pathOffset, parseInfo->paramOffset - parseInfo->pathOffset - (includeSeparators ? 0 : 1)); |
| } |
| else if ( parseInfo->queryExists ) { |
| // end is queryOffset minus the '?' |
| result = CFRangeMake(parseInfo->pathOffset, parseInfo->queryOffset - parseInfo->pathOffset - (includeSeparators ? 0 : 1)); |
| } |
| else if ( parseInfo->fragmentExists ) { |
| // end fragmentOffset is minus the '#' |
| result = CFRangeMake(parseInfo->pathOffset, parseInfo->fragmentOffset - parseInfo->pathOffset - (includeSeparators ? 0 : 1)); |
| } |
| else { |
| // end is endOffset |
| result = CFRangeMake(parseInfo->pathOffset, parseInfo->endOffset - parseInfo->pathOffset); |
| } |
| return ( result ); |
| } |
| |
| |
| #if 0 // unused but might be needed in the future |
| |
| /* |
| * Returns the range of the obsolete rfc1808 param component. |
| * |
| * If includeSeparators is true, the characters that separate the param |
| * from other components/subcomponents are included. |
| */ |
| static CFRange _CFURIParserGetParamRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->paramExists ) { |
| if ( parseInfo->queryExists ) { |
| // end is queryOffset minus the '?' |
| result = CFRangeMake(parseInfo->paramOffset, parseInfo->queryOffset - parseInfo->paramOffset - 1); |
| } |
| else if ( parseInfo->fragmentExists ) { |
| // end fragmentOffset is minus the '#' |
| result = CFRangeMake(parseInfo->paramOffset, parseInfo->fragmentOffset - parseInfo->paramOffset - 1); |
| } |
| else { |
| // end is endOffset |
| result = CFRangeMake(parseInfo->paramOffset, parseInfo->endOffset - parseInfo->paramOffset); |
| } |
| |
| if ( includeSeparators ) { |
| result.location--; |
| result.length += (parseInfo->queryExists || parseInfo->fragmentExists) ? 2 : 1; |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| /* |
| * Returns the range of the obsolete resource specifier component. |
| * |
| * If includeSeparators is true, the characters that separate the resource specifier |
| * from other components/subcomponents are included. |
| */ |
| static CFRange _CFURIParserGetResourceSpecifierRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->paramExists ) { |
| // start is paramOffset; end is endOffset |
| result = CFRangeMake(parseInfo->paramOffset, parseInfo->endOffset - parseInfo->paramOffset); |
| } |
| else if ( parseInfo->queryExists ) { |
| // start is queryOffset; end is endOffset |
| result = CFRangeMake(parseInfo->queryOffset, parseInfo->endOffset - parseInfo->queryOffset); |
| } |
| else if ( parseInfo->fragmentExists ) { |
| // start is fragmentOffset; end is endOffset |
| result = CFRangeMake(parseInfo->fragmentOffset, parseInfo->endOffset - parseInfo->fragmentOffset); |
| } |
| else if ( !parseInfo->authorityExists ) { |
| // start is pathOffset; end is endOffset |
| result = CFRangeMake(parseInfo->pathOffset, parseInfo->endOffset - parseInfo->pathOffset); |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| |
| if ( includeSeparators && (result.location != kCFNotFound) ) { |
| result.location--; |
| result.length++; |
| } |
| return ( result ); |
| } |
| |
| #endif // unused but might be needed in the future |
| |
| |
| /* |
| * Returns the range of the query component. |
| * |
| * If includeSeparators is true, the characters that separate the query |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetQueryRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->queryExists ) { |
| if ( parseInfo->fragmentExists ) { |
| // end is fragmentOffset minus the '#' |
| result = CFRangeMake(parseInfo->queryOffset, parseInfo->fragmentOffset - parseInfo->queryOffset - 1); |
| } |
| else { |
| // end is endOffset |
| result = CFRangeMake(parseInfo->queryOffset, parseInfo->endOffset - parseInfo->queryOffset); |
| } |
| |
| if ( includeSeparators ) { |
| result.location--; |
| result.length += parseInfo->fragmentExists ? 2 : 1; |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| |
| /* |
| * Returns the range of the fragment component. |
| * |
| * If includeSeparators is true, the characters that separate the fragment |
| * from other components/subcomponents are included. |
| */ |
| CF_PRIVATE CFRange _CFURIParserGetFragmentRange(const struct _URIParseInfo *parseInfo, Boolean includeSeparators) |
| { |
| CFRange result; |
| |
| if ( parseInfo->fragmentExists ) { |
| // end is endOffset |
| result = CFRangeMake(parseInfo->fragmentOffset, parseInfo->endOffset - parseInfo->fragmentOffset); |
| |
| if ( includeSeparators ) { |
| result.location--; |
| result.length++; |
| } |
| } |
| else { |
| result = CFRangeMake(kCFNotFound, 0); |
| } |
| return ( result ); |
| } |
| |
| CF_PRIVATE Boolean _CFURIParserAlphaAllowed(UniChar ch) { |
| return ((sURLAllowedCharacters[ch] & kURLAlphaAllowed) != 0); |
| } |
| |
| CF_PRIVATE Boolean _CFURIParserValidateComponent(CFStringRef urlString, CFRange componentRange, _CFURIParserURLAllowedCharacter allowedMask, Boolean pctEncodedAllowed) |
| { |
| Boolean result = true; |
| CFStringInlineBuffer buf; |
| unsigned long checkHexDigit = 0; |
| if ( (componentRange.location != kCFNotFound) && (componentRange.length > 0) ) { |
| // we've already checked componentRange.location for kCFNotFound and componentRange.length is always less than 2GB so this cast to a CFRange is safe |
| CFStringInitInlineBuffer(urlString, &buf, *(CFRange*)&componentRange); |
| for (CFIndex idx = 0; idx < componentRange.length; ++idx ) { |
| UniChar ch = __CFStringGetCharacterFromInlineBufferQuick(&buf, idx); |
| if ( !checkHexDigit ) { |
| if ( pctEncodedAllowed && (ch == '%') ) { |
| // percent encoded? make sure there at least 2 characters left to check |
| if ( (idx + 2) < componentRange.length ) { |
| // the next 2 characters must be HEXDIG |
| checkHexDigit = 2; |
| continue; |
| } |
| else { |
| result = false; |
| break; |
| } |
| } |
| else if ( (ch > 127) || ((sURLAllowedCharacters[ch] & allowedMask) == 0) ) { |
| if ( (allowedMask == kURLHostAllowed) && (ch == ':') ) { |
| continue; |
| } |
| result = false; |
| break; |
| } |
| } |
| else { |
| if ( (ch <= 127) && ((sURLAllowedCharacters[ch] & kURLHexDigAllowed) != 0) ) { |
| --checkHexDigit; |
| continue; |
| } |
| else { |
| result = false; |
| break; |
| } |
| } |
| } |
| } |
| return ( result ); |
| } |
| |
| CF_PRIVATE Boolean _CFURIParserURLStringIsValid(CFStringRef urlString, struct _URIParseInfo *parseInfo) |
| { |
| Boolean result = true; |
| CFRange componentRange; |
| |
| // scheme range is already valid from ParseURIReference() |
| |
| // validate the user |
| componentRange = _CFURIParserGetUserinfoNameRange(parseInfo, false); |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLUserAllowed, true); |
| if ( !result ) goto invalidComponent; |
| |
| // validate the password |
| componentRange = _CFURIParserGetUserinfoPasswordRange(parseInfo, false); |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLPasswordAllowed, true); |
| if ( !result ) goto invalidComponent; |
| |
| // validate the host |
| componentRange = _CFURIParserGetHostRange(parseInfo, false); |
| if ( (componentRange.location != kCFNotFound) && (componentRange.length >= 2) && (CFStringGetCharacterAtIndex(urlString, componentRange.location) == '[') && (CFStringGetCharacterAtIndex(urlString, componentRange.location + componentRange.length - 1) == ']') ) { |
| // the host is an IP-Literal -- only validate the characters inside brackets |
| ++componentRange.location; |
| componentRange.length -= 2; |
| } |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLHostAllowed, true); |
| if ( !result ) goto invalidComponent; |
| |
| // validate the port |
| componentRange = _CFURIParserGetPortRange(parseInfo, false); |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLPortAllowed, false); |
| if ( !result ) goto invalidComponent; |
| |
| // validate the path |
| componentRange = _CFURIParserGetPathRange(parseInfo, false, false); |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLPathAllowed, true); |
| if ( !result ) goto invalidComponent; |
| |
| // validate the query |
| componentRange = _CFURIParserGetQueryRange(parseInfo, false); |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLQueryAllowed, true); |
| if ( !result ) goto invalidComponent; |
| |
| // validate the fragment |
| componentRange = _CFURIParserGetFragmentRange(parseInfo, false); |
| result = _CFURIParserValidateComponent(urlString, componentRange, kURLFragmentAllowed, true); |
| |
| invalidComponent: |
| return ( result ); |
| } |