CoreFoundation/URL.subproj/CFURL.inc.h - third_party/swift-corelibs-foundation - Git at Google

 /*	CFURL.inc.h
 	Copyright (c) 2012-2016, Apple Inc. and the Swift project authors

 	Portions Copyright (c) 2014-2016 Apple Inc. and the Swift project authors
 	Licensed under Apache License v2.0 with Runtime Library Exception
 	See http://swift.org/LICENSE.txt for license information
 	See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 	Responsibility: Jim Luther/Chris Linn
 */


 /*

  What's this file for?

  CFURL's URL string parser needs to be able to parse either an array of char or an array of UniChar.

  The code in CFURL.c used to use this macro "#define STRING_CHAR(x) (useCString ? cstring[(x)] : ustring[(x)])" to determine which array to get a character from for every character looked at in the URL string. That macro added one or more compare and branch instructins to the parser's execution for *every* character in the URL string. Those extra compares and branches added up to 10% of the time (for long URL strings) it takes to create a URL object.

  To ensure the exact same parser code is run over a char or a UniChar string, the source code was move to this .h file and is included multiple times by CFURL.c as needed. "STRING_CHAR(x)" was replaced by "characterArray[x]", and characterArray is defined as either an "const char *" or a "const UniChar *" for the two sets of function headers that are either parsing an array of char or an array of UniChar.

  Any changes made to the parser are made in this file so that both char and the UniChar strings are parsed exactly the same way.

  */

 /*
     static void _parseComponentsCString(CFAllocatorRef alloc, CFURLRef baseURL, CFIndex cfStringLength, const char *characterArray, UInt32 *theFlags, CFRange *packedRanges, uint8_t *numberOfRanges)
  or
     static void _parseComponentsUString(CFAllocatorRef alloc, CFURLRef baseURL, CFIndex cfStringLength, const UniChar *characterArray, UInt32 *theFlags, CFRange *packedRanges, uint8_t *numberOfRanges)
  */
 #ifdef CFURL_INCLUDE_PARSE_COMPONENTS // defined when we want this block of code included
 {
     /* index gives the URL part involved; to calculate the correct range index, use the number of the bit of the equivalent flag (i.e. the host flag is HAS_HOST, which is 0x8.  so the range index for the host is 3.)  Note that this is true in this function ONLY, since the ranges stored in (*range) are actually packed, skipping those URL components that don't exist.  This is why the indices are hard-coded in this function. */
    enum {
         scheme_index        = 0,
         user_index          = 1,
         password_index      = 2,
         host_index          = 3,
         port_index          = 4,
         path_index          = 5,
         parameters_index    = 6,
         query_index         = 7,
         fragment_index      = 8,
     };
     CFRange unpackedRanges[MAX_COMPONENTS] = {{0}};

     CFIndex idx, base_idx = 0;
     CFIndex string_length;
     UInt32 flags = *theFlags;
     Boolean isCompliant;
     uint8_t numRanges = 0;

     string_length = cfStringLength;

     // Algorithm is as described in RFC 1808
     // 1: parse the fragment; remainder after left-most "#" is fragment
     for (idx = base_idx; idx < string_length; idx++) {
         if ('#' == characterArray[idx]) {
             flags |= HAS_FRAGMENT;
             unpackedRanges[fragment_index].location = idx + 1;
             unpackedRanges[fragment_index].length = string_length - (idx + 1);
             numRanges ++;
             string_length = idx;	// remove fragment from parse string
             break;
         }
     }
     // 2: parse the scheme
     for (idx = base_idx; idx < string_length; idx++) {
         UniChar ch = characterArray[idx];
         if (':' == ch) {
             flags |= HAS_SCHEME;
             unpackedRanges[scheme_index].location = base_idx;
             unpackedRanges[scheme_index].length = idx;
             numRanges ++;
             base_idx = idx + 1;
             // optimization for ftp urls
             if (idx == 3 && characterArray[0] == 'f' && characterArray[1] == 't' && characterArray[2] == 'p') {
                 _setSchemeTypeInFlags(&flags, kHasFtpScheme);
             }
             else if (idx == 4) {
                 // optimization for http urls
                 if (characterArray[0] == 'h' && characterArray[1] == 't' && characterArray[2] == 't' && characterArray[3] == 'p') {
                     _setSchemeTypeInFlags(&flags, kHasHttpScheme);
                 }
                 // optimization for file urls
                 if (characterArray[0] == 'f' && characterArray[1] == 'i' && characterArray[2] == 'l' && characterArray[3] == 'e') {
                     _setSchemeTypeInFlags(&flags, kHasFileScheme);
                 }
                 // optimization for data urls
                 if (characterArray[0] == 'd' && characterArray[1] == 'a' && characterArray[2] == 't' && characterArray[3] == 'a') {
                     _setSchemeTypeInFlags(&flags, kHasDataScheme);
                 }
             }
             // optimization for https urls
             else if (idx == 5 && characterArray[0] == 'h' && characterArray[1] == 't' && characterArray[2] == 't' && characterArray[3] == 'p' && characterArray[4] == 's') {
                 _setSchemeTypeInFlags(&flags, kHasHttpsScheme);
             }
             break;
         } else if (!scheme_valid(ch)) {
             break;	// invalid scheme character -- no scheme
         }
     }

     // Make sure we have an RFC-1808 compliant URL - that's either something without a scheme, or scheme:/(stuff) or scheme://(stuff)
     // Strictly speaking, RFC 1808 & 2396 bar "scheme:" (with nothing following the colon); however, common usage
     // expects this to be treated identically to "scheme://" - REW, 12/08/03
     if (!(flags & HAS_SCHEME)) {
         isCompliant = true;
     } else if (base_idx == string_length) {
         isCompliant = false;
     } else if (characterArray[base_idx] != '/') {
         isCompliant = false;
     } else {
         isCompliant = true;
     }

     if (!isCompliant) {
         // Clear the fragment flag if it's been set
         if (flags & HAS_FRAGMENT) {
             flags &= (~HAS_FRAGMENT);
             string_length = cfStringLength;
         }
         (*theFlags) = flags;
         packedRanges[scheme_index].location = unpackedRanges[scheme_index].location;
         packedRanges[scheme_index].length = unpackedRanges[scheme_index].length;
         *numberOfRanges = 1;
     }
     else {
         // URL is 1808-compliant
         flags |= IS_DECOMPOSABLE;

         // 3: parse the network location and login
         if (2 <= (string_length - base_idx) && '/' == characterArray[base_idx] && '/' == characterArray[base_idx+1]) {
             CFIndex base = 2 + base_idx, extent;
             for (idx = base; idx < string_length; idx++) {
                 if ('/' == characterArray[idx] || '?' == characterArray[idx]) {
                     break;
                 }
             }
             extent = idx;

             // net_loc parts extend from base to extent (but not including), which might be to end of string
             // net location is "<user>:<password>@<host>:<port>"
             if (extent != base) {
                 for (idx = base; idx < extent; idx++) {
                     if ('@' == characterArray[idx]) {   // there is a user
                         CFIndex idx2;
                         flags |= HAS_USER;
                         numRanges ++;
                         unpackedRanges[user_index].location = base;  // base of the user
                         for (idx2 = base; idx2 < idx; idx2++) {
                             if (':' == characterArray[idx2]) {	// found a password separator
                                 flags |= HAS_PASSWORD;
                                 numRanges ++;
                                 unpackedRanges[password_index].location = idx2+1; // base of the password
                                 unpackedRanges[password_index].length = idx-(idx2+1);  // password extent
                                 unpackedRanges[user_index].length = idx2 - base; // user extent
                                 break;
                             }
                         }
                         if (!(flags & HAS_PASSWORD)) {
                             // user extends to the '@'
                             unpackedRanges[user_index].length = idx - base; // user extent
                         }
                         base = idx + 1;
                         break;
                     }
                 }
                 flags |= HAS_HOST;
                 numRanges ++;
                 unpackedRanges[host_index].location = base; // base of host

                 // base has been advanced past the user and password if they existed
                 for (idx = base; idx < extent; idx++) {
                     // IPV6 support (RFC 2732) DCJ June/10/2002
                     if ('[' == characterArray[idx]) {	// starting IPV6 explicit address
                         //	Find the ']' terminator of the IPv6 address, leave idx pointing to ']' or end
                         for ( ; idx < extent; ++ idx ) {
                             if ( ']' == characterArray[idx]) {
                                 flags |= IS_IPV6_ENCODED;
                                 break;
                             }
                         }
                     }
                     // there is a port if we see a colon.  Only the last one is the port, though.
                     else if ( ':' == characterArray[idx]) {
                         flags |= HAS_PORT;
                         numRanges ++;
                         unpackedRanges[port_index].location = idx+1; // base of port
                         unpackedRanges[port_index].length = extent - (idx+1); // port extent
                         unpackedRanges[host_index].length = idx - base; // host extent
                         break;
                     }
                 }
                 if (!(flags & HAS_PORT)) {
                     unpackedRanges[host_index].length = extent - base;  // host extent
                 }
             }
             base_idx = extent;
         }

         // 4: parse the query; remainder after left-most "?" is query
         for (idx = base_idx; idx < string_length; idx++) {
             if ('?' == characterArray[idx]) {
                 flags |= HAS_QUERY;
                 numRanges ++;
                 unpackedRanges[query_index].location = idx + 1;
                 unpackedRanges[query_index].length = string_length - (idx+1);
                 string_length = idx;	// remove query from parse string
                 break;
             }
         }

         // 5: parse the parameters; remainder after left-most ";" is parameters
         for (idx = base_idx; idx < string_length; idx++) {
             if (';' == characterArray[idx]) {
                 flags |= HAS_PARAMETERS;
                 numRanges ++;
                 unpackedRanges[parameters_index].location = idx + 1;
                 unpackedRanges[parameters_index].length = string_length - (idx+1);
                 string_length = idx;	// remove parameters from parse string
                 break;
             }
         }

         // 6: parse the path; it's whatever's left between string_length & base_idx
         if (string_length - base_idx != 0 || (flags & NET_LOCATION_MASK))
         {
             // If we have a net location, we are 1808-compliant, and an empty path substring implies a path of "/"
             UniChar ch;
             Boolean isDir;
             CFRange pathRg;
             flags |= HAS_PATH;
             numRanges ++;
             pathRg.location = base_idx;
             pathRg.length = string_length - base_idx;
             unpackedRanges[path_index] = pathRg;

             if (pathRg.length > 0) {
                 Boolean sawPercent = FALSE;
                 for (idx = pathRg.location; idx < string_length; idx++) {
                     if ('%' == characterArray[idx]) {
                         sawPercent = TRUE;
                         break;
                     }
                 }
 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
                 if (pathRg.length > 6 && characterArray[pathRg.location] == '/' && characterArray[pathRg.location + 1] == '.' && characterArray[pathRg.location + 2] == 'f' && characterArray[pathRg.location + 3] == 'i' && characterArray[pathRg.location + 4] == 'l' && characterArray[pathRg.location + 5] == 'e' && characterArray[pathRg.location + 6] == '/') {
                     flags |= PATH_HAS_FILE_ID;
                 } else if (!sawPercent) {
                     flags |= POSIX_AND_URL_PATHS_MATCH;
                 }
 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_WINDOWS
                 if (!sawPercent) {
                     flags |= POSIX_AND_URL_PATHS_MATCH;
                 }
 #endif

                 ch = characterArray[pathRg.location + pathRg.length - 1];
                 if (ch == '/') {
                     isDir = true;
                 } else if (ch == '.') {
                     if (pathRg.length == 1) {
                         isDir = true;
                     } else {
                         ch = characterArray[pathRg.location + pathRg.length - 2];
                         if (ch == '/') {
                             isDir = true;
                         } else if (ch != '.') {
                             isDir = false;
                         } else if (pathRg.length == 2) {
                             isDir = true;
                         } else {
                             isDir = (characterArray[pathRg.location + pathRg.length - 3] == '/');
                         }
                     }
                 } else {
                     isDir = false;
                 }
             } else {
                 isDir = (baseURL != NULL) ? CFURLHasDirectoryPath(baseURL) : false;
             }
             if (isDir) {
                 flags |= IS_DIRECTORY;
             }
         }

         (*theFlags) = flags;
         *numberOfRanges = numRanges;
         numRanges = 0;
         for (idx = 0, flags = 1; flags != (1<<9); flags = (flags<<1), idx ++) {
             if ((*theFlags) & flags) {
                 packedRanges[numRanges] = unpackedRanges[idx];
                 numRanges ++;
             }
         }
     }
 }
 #endif  // CFURL_INCLUDE_PARSE_COMPONENTS

 /*
     static Boolean scanCharactersCString(CFAllocatorRef alloc, CFMutableStringRef *escapedString, UInt32 *flags, const char *characterArray, Boolean useCString, CFIndex base, CFIndex end, CFIndex *mark, UInt32 componentFlag, CFStringEncoding encoding)
     static Boolean scanCharactersUString(CFAllocatorRef alloc, CFMutableStringRef *escapedString, UInt32 *flags, const UniChar *characterArray, Boolean useCString, CFIndex base, CFIndex end, CFIndex *mark, UInt32 componentFlag, CFStringEncoding encoding)
  */
 #ifdef CFURL_INCLUDE_SCAN_CHARACTERS  // defined when we want this block of code included
 {
     CFIndex idx;
     Boolean sawIllegalChar = false;
     for (idx = base; idx < end; idx ++) {
         Boolean shouldEscape;
         UniChar ch = characterArray[idx];
         if (isURLLegalCharacter(ch)) {
             if ((componentFlag == HAS_USER || componentFlag == HAS_PASSWORD) && (ch == '/' || ch == '?' || ch == '@')) {
                 shouldEscape = true;
             } else {
                 shouldEscape = false;
             }
         } else if (ch == '%' && idx + 2 < end && isHexDigit(characterArray[idx + 1]) && isHexDigit(characterArray[idx+2])) {
             shouldEscape = false;
         } else if (componentFlag == HAS_HOST && ((idx == base && ch == '[') || (idx == end-1 && ch == ']'))) {
             shouldEscape = false;
         } else {
             shouldEscape = true;
         }
         if (shouldEscape) {
             sawIllegalChar = true;
             if (componentFlag && flags) {
                 *flags |= componentFlag;
             }
             if (!*escapedString) {
                 *escapedString = CFStringCreateMutable(alloc, 0);
             }
             if (useCString) {
                 CFStringRef tempString = CFStringCreateWithBytes(alloc, (uint8_t *)&(characterArray[*mark]), idx - *mark, kCFStringEncodingISOLatin1, false);
                 CFStringAppend(*escapedString, tempString);
                 CFRelease(tempString);
             } else {
                 CFStringAppendCharacters(*escapedString, (const UniChar *)&(characterArray[*mark]), idx - *mark);
             }
             // try as a single UniChar first
             if ( _appendPercentEscapesForCharacter(&ch, false, encoding, *escapedString) ) {
                 *mark = idx + 1;
             }
             // if that failed, the encoding is UTF8 and this is a surrogate pair, then percent-encode the surrogate pair
             else if ( (encoding == kCFStringEncodingUTF8) && ((idx + 1) < end) && CFCharacterSetIsSurrogateHighCharacter(ch) && CFCharacterSetIsSurrogateLowCharacter(characterArray[idx + 1]) ) {
                 // percent-encode the surrogate pair
                 UniChar surrogatePair[2];
                 surrogatePair[0] = ch;
                 surrogatePair[1] = characterArray[idx + 1];
                 if ( _appendPercentEscapesForCharacter(surrogatePair, true, encoding, *escapedString) ) {
                     // we consumed 2 chararacters instead of 1
                     *mark = idx + 2;
                     ++idx;
                 }
             }
         }
     }
     return sawIllegalChar;
 }
 #endif  // CFURL_INCLUDE_SCAN_CHARACTERS
	/* CFURL.inc.h
	Copyright (c) 2012-2016, Apple Inc. and the Swift project authors

	Portions Copyright (c) 2014-2016 Apple Inc. and the Swift project authors
	Licensed under Apache License v2.0 with Runtime Library Exception
	See http://swift.org/LICENSE.txt for license information
	See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
	Responsibility: Jim Luther/Chris Linn
	*/


	/*

	What's this file for?

	CFURL's URL string parser needs to be able to parse either an array of char or an array of UniChar.

	The code in CFURL.c used to use this macro "#define STRING_CHAR(x) (useCString ? cstring[(x)] : ustring[(x)])" to determine which array to get a character from for every character looked at in the URL string. That macro added one or more compare and branch instructins to the parser's execution for every character in the URL string. Those extra compares and branches added up to 10% of the time (for long URL strings) it takes to create a URL object.

	To ensure the exact same parser code is run over a char or a UniChar string, the source code was move to this .h file and is included multiple times by CFURL.c as needed. "STRING_CHAR(x)" was replaced by "characterArray[x]", and characterArray is defined as either an "const char " or a "const UniChar " for the two sets of function headers that are either parsing an array of char or an array of UniChar.

	Any changes made to the parser are made in this file so that both char and the UniChar strings are parsed exactly the same way.

	*/

	/*
	static void _parseComponentsCString(CFAllocatorRef alloc, CFURLRef baseURL, CFIndex cfStringLength, const char characterArray, UInt32 theFlags, CFRange packedRanges, uint8_t numberOfRanges)
	or
	static void _parseComponentsUString(CFAllocatorRef alloc, CFURLRef baseURL, CFIndex cfStringLength, const UniChar characterArray, UInt32 theFlags, CFRange packedRanges, uint8_t numberOfRanges)
	*/
	#ifdef CFURL_INCLUDE_PARSE_COMPONENTS // defined when we want this block of code included
	{
	/* index gives the URL part involved; to calculate the correct range index, use the number of the bit of the equivalent flag (i.e. the host flag is HAS_HOST, which is 0x8. so the range index for the host is 3.) Note that this is true in this function ONLY, since the ranges stored in (range) are actually packed, skipping those URL components that don't exist. This is why the indices are hard-coded in this function. /
	enum {
	scheme_index = 0,
	user_index = 1,
	password_index = 2,
	host_index = 3,
	port_index = 4,
	path_index = 5,
	parameters_index = 6,
	query_index = 7,
	fragment_index = 8,
	};
	CFRange unpackedRanges[MAX_COMPONENTS] = {{0}};

	CFIndex idx, base_idx = 0;
	CFIndex string_length;
	UInt32 flags = *theFlags;
	Boolean isCompliant;
	uint8_t numRanges = 0;

	string_length = cfStringLength;

	// Algorithm is as described in RFC 1808
	// 1: parse the fragment; remainder after left-most "#" is fragment
	for (idx = base_idx; idx < string_length; idx++) {
	if ('#' == characterArray[idx]) {
	flags \|= HAS_FRAGMENT;
	unpackedRanges[fragment_index].location = idx + 1;
	unpackedRanges[fragment_index].length = string_length - (idx + 1);
	numRanges ++;
	string_length = idx; // remove fragment from parse string
	break;
	}
	}
	// 2: parse the scheme
	for (idx = base_idx; idx < string_length; idx++) {
	UniChar ch = characterArray[idx];
	if (':' == ch) {
	flags \|= HAS_SCHEME;
	unpackedRanges[scheme_index].location = base_idx;
	unpackedRanges[scheme_index].length = idx;
	numRanges ++;
	base_idx = idx + 1;
	// optimization for ftp urls
	if (idx == 3 && characterArray[0] == 'f' && characterArray[1] == 't' && characterArray[2] == 'p') {
	_setSchemeTypeInFlags(&flags, kHasFtpScheme);
	}
	else if (idx == 4) {
	// optimization for http urls
	if (characterArray[0] == 'h' && characterArray[1] == 't' && characterArray[2] == 't' && characterArray[3] == 'p') {
	_setSchemeTypeInFlags(&flags, kHasHttpScheme);
	}
	// optimization for file urls
	if (characterArray[0] == 'f' && characterArray[1] == 'i' && characterArray[2] == 'l' && characterArray[3] == 'e') {
	_setSchemeTypeInFlags(&flags, kHasFileScheme);
	}
	// optimization for data urls
	if (characterArray[0] == 'd' && characterArray[1] == 'a' && characterArray[2] == 't' && characterArray[3] == 'a') {
	_setSchemeTypeInFlags(&flags, kHasDataScheme);
	}
	}
	// optimization for https urls
	else if (idx == 5 && characterArray[0] == 'h' && characterArray[1] == 't' && characterArray[2] == 't' && characterArray[3] == 'p' && characterArray[4] == 's') {
	_setSchemeTypeInFlags(&flags, kHasHttpsScheme);
	}
	break;
	} else if (!scheme_valid(ch)) {
	break; // invalid scheme character -- no scheme
	}
	}

	// Make sure we have an RFC-1808 compliant URL - that's either something without a scheme, or scheme:/(stuff) or scheme://(stuff)
	// Strictly speaking, RFC 1808 & 2396 bar "scheme:" (with nothing following the colon); however, common usage
	// expects this to be treated identically to "scheme://" - REW, 12/08/03
	if (!(flags & HAS_SCHEME)) {
	isCompliant = true;
	} else if (base_idx == string_length) {
	isCompliant = false;
	} else if (characterArray[base_idx] != '/') {
	isCompliant = false;
	} else {
	isCompliant = true;
	}

	if (!isCompliant) {
	// Clear the fragment flag if it's been set
	if (flags & HAS_FRAGMENT) {
	flags &= (~HAS_FRAGMENT);
	string_length = cfStringLength;
	}
	(*theFlags) = flags;
	packedRanges[scheme_index].location = unpackedRanges[scheme_index].location;
	packedRanges[scheme_index].length = unpackedRanges[scheme_index].length;
	*numberOfRanges = 1;
	}
	else {
	// URL is 1808-compliant
	flags \|= IS_DECOMPOSABLE;

	// 3: parse the network location and login
	if (2 <= (string_length - base_idx) && '/' == characterArray[base_idx] && '/' == characterArray[base_idx+1]) {
	CFIndex base = 2 + base_idx, extent;
	for (idx = base; idx < string_length; idx++) {
	if ('/' == characterArray[idx] \|\| '?' == characterArray[idx]) {
	break;
	}
	}
	extent = idx;

	// net_loc parts extend from base to extent (but not including), which might be to end of string
	// net location is "<user>:<password>@<host>:<port>"
	if (extent != base) {
	for (idx = base; idx < extent; idx++) {
	if ('@' == characterArray[idx]) { // there is a user
	CFIndex idx2;
	flags \|= HAS_USER;
	numRanges ++;
	unpackedRanges[user_index].location = base; // base of the user
	for (idx2 = base; idx2 < idx; idx2++) {
	if (':' == characterArray[idx2]) { // found a password separator
	flags \|= HAS_PASSWORD;
	numRanges ++;
	unpackedRanges[password_index].location = idx2+1; // base of the password
	unpackedRanges[password_index].length = idx-(idx2+1); // password extent
	unpackedRanges[user_index].length = idx2 - base; // user extent
	break;
	}
	}
	if (!(flags & HAS_PASSWORD)) {
	// user extends to the '@'
	unpackedRanges[user_index].length = idx - base; // user extent
	}
	base = idx + 1;
	break;
	}
	}
	flags \|= HAS_HOST;
	numRanges ++;
	unpackedRanges[host_index].location = base; // base of host

	// base has been advanced past the user and password if they existed
	for (idx = base; idx < extent; idx++) {
	// IPV6 support (RFC 2732) DCJ June/10/2002
	if ('[' == characterArray[idx]) { // starting IPV6 explicit address
	// Find the ']' terminator of the IPv6 address, leave idx pointing to ']' or end
	for ( ; idx < extent; ++ idx ) {
	if ( ']' == characterArray[idx]) {
	flags \|= IS_IPV6_ENCODED;
	break;
	}
	}
	}
	// there is a port if we see a colon. Only the last one is the port, though.
	else if ( ':' == characterArray[idx]) {
	flags \|= HAS_PORT;
	numRanges ++;
	unpackedRanges[port_index].location = idx+1; // base of port
	unpackedRanges[port_index].length = extent - (idx+1); // port extent
	unpackedRanges[host_index].length = idx - base; // host extent
	break;
	}
	}
	if (!(flags & HAS_PORT)) {
	unpackedRanges[host_index].length = extent - base; // host extent
	}
	}
	base_idx = extent;
	}

	// 4: parse the query; remainder after left-most "?" is query
	for (idx = base_idx; idx < string_length; idx++) {
	if ('?' == characterArray[idx]) {
	flags \|= HAS_QUERY;
	numRanges ++;
	unpackedRanges[query_index].location = idx + 1;
	unpackedRanges[query_index].length = string_length - (idx+1);
	string_length = idx; // remove query from parse string
	break;
	}
	}

	// 5: parse the parameters; remainder after left-most ";" is parameters
	for (idx = base_idx; idx < string_length; idx++) {
	if (';' == characterArray[idx]) {
	flags \|= HAS_PARAMETERS;
	numRanges ++;
	unpackedRanges[parameters_index].location = idx + 1;
	unpackedRanges[parameters_index].length = string_length - (idx+1);
	string_length = idx; // remove parameters from parse string
	break;
	}
	}

	// 6: parse the path; it's whatever's left between string_length & base_idx
	if (string_length - base_idx != 0 \|\| (flags & NET_LOCATION_MASK))
	{
	// If we have a net location, we are 1808-compliant, and an empty path substring implies a path of "/"
	UniChar ch;
	Boolean isDir;
	CFRange pathRg;
	flags \|= HAS_PATH;
	numRanges ++;
	pathRg.location = base_idx;
	pathRg.length = string_length - base_idx;
	unpackedRanges[path_index] = pathRg;

	if (pathRg.length > 0) {
	Boolean sawPercent = FALSE;
	for (idx = pathRg.location; idx < string_length; idx++) {
	if ('%' == characterArray[idx]) {
	sawPercent = TRUE;
	break;
	}
	}
	#if DEPLOYMENT_TARGET_MACOSX \|\| DEPLOYMENT_TARGET_EMBEDDED \|\| DEPLOYMENT_TARGET_EMBEDDED_MINI
	if (pathRg.length > 6 && characterArray[pathRg.location] == '/' && characterArray[pathRg.location + 1] == '.' && characterArray[pathRg.location + 2] == 'f' && characterArray[pathRg.location + 3] == 'i' && characterArray[pathRg.location + 4] == 'l' && characterArray[pathRg.location + 5] == 'e' && characterArray[pathRg.location + 6] == '/') {
	flags \|= PATH_HAS_FILE_ID;
	} else if (!sawPercent) {
	flags \|= POSIX_AND_URL_PATHS_MATCH;
	}
	#elif DEPLOYMENT_TARGET_LINUX \|\| DEPLOYMENT_TARGET_WINDOWS
	if (!sawPercent) {
	flags \|= POSIX_AND_URL_PATHS_MATCH;
	}
	#endif

	ch = characterArray[pathRg.location + pathRg.length - 1];
	if (ch == '/') {
	isDir = true;
	} else if (ch == '.') {
	if (pathRg.length == 1) {
	isDir = true;
	} else {
	ch = characterArray[pathRg.location + pathRg.length - 2];
	if (ch == '/') {
	isDir = true;
	} else if (ch != '.') {
	isDir = false;
	} else if (pathRg.length == 2) {
	isDir = true;
	} else {
	isDir = (characterArray[pathRg.location + pathRg.length - 3] == '/');
	}
	}
	} else {
	isDir = false;
	}
	} else {
	isDir = (baseURL != NULL) ? CFURLHasDirectoryPath(baseURL) : false;
	}
	if (isDir) {
	flags \|= IS_DIRECTORY;
	}
	}

	(*theFlags) = flags;
	*numberOfRanges = numRanges;
	numRanges = 0;
	for (idx = 0, flags = 1; flags != (1<<9); flags = (flags<<1), idx ++) {
	if ((*theFlags) & flags) {
	packedRanges[numRanges] = unpackedRanges[idx];
	numRanges ++;
	}
	}
	}
	}
	#endif // CFURL_INCLUDE_PARSE_COMPONENTS

	/*
	static Boolean scanCharactersCString(CFAllocatorRef alloc, CFMutableStringRef escapedString, UInt32 flags, const char characterArray, Boolean useCString, CFIndex base, CFIndex end, CFIndex mark, UInt32 componentFlag, CFStringEncoding encoding)
	static Boolean scanCharactersUString(CFAllocatorRef alloc, CFMutableStringRef escapedString, UInt32 flags, const UniChar characterArray, Boolean useCString, CFIndex base, CFIndex end, CFIndex mark, UInt32 componentFlag, CFStringEncoding encoding)
	*/
	#ifdef CFURL_INCLUDE_SCAN_CHARACTERS // defined when we want this block of code included
	{
	CFIndex idx;
	Boolean sawIllegalChar = false;
	for (idx = base; idx < end; idx ++) {
	Boolean shouldEscape;
	UniChar ch = characterArray[idx];
	if (isURLLegalCharacter(ch)) {
	if ((componentFlag == HAS_USER \|\| componentFlag == HAS_PASSWORD) && (ch == '/' \|\| ch == '?' \|\| ch == '@')) {
	shouldEscape = true;
	} else {
	shouldEscape = false;
	}
	} else if (ch == '%' && idx + 2 < end && isHexDigit(characterArray[idx + 1]) && isHexDigit(characterArray[idx+2])) {
	shouldEscape = false;
	} else if (componentFlag == HAS_HOST && ((idx == base && ch == '[') \|\| (idx == end-1 && ch == ']'))) {
	shouldEscape = false;
	} else {
	shouldEscape = true;
	}
	if (shouldEscape) {
	sawIllegalChar = true;
	if (componentFlag && flags) {
	*flags \|= componentFlag;
	}
	if (!*escapedString) {
	*escapedString = CFStringCreateMutable(alloc, 0);
	}
	if (useCString) {
	CFStringRef tempString = CFStringCreateWithBytes(alloc, (uint8_t )&(characterArray[mark]), idx - *mark, kCFStringEncodingISOLatin1, false);
	CFStringAppend(*escapedString, tempString);
	CFRelease(tempString);
	} else {
	CFStringAppendCharacters(escapedString, (const UniChar )&(characterArray[mark]), idx - mark);
	}
	// try as a single UniChar first
	if ( _appendPercentEscapesForCharacter(&ch, false, encoding, *escapedString) ) {
	*mark = idx + 1;
	}
	// if that failed, the encoding is UTF8 and this is a surrogate pair, then percent-encode the surrogate pair
	else if ( (encoding == kCFStringEncodingUTF8) && ((idx + 1) < end) && CFCharacterSetIsSurrogateHighCharacter(ch) && CFCharacterSetIsSurrogateLowCharacter(characterArray[idx + 1]) ) {
	// percent-encode the surrogate pair
	UniChar surrogatePair[2];
	surrogatePair[0] = ch;
	surrogatePair[1] = characterArray[idx + 1];
	if ( _appendPercentEscapesForCharacter(surrogatePair, true, encoding, *escapedString) ) {
	// we consumed 2 chararacters instead of 1
	*mark = idx + 2;
	++idx;
	}
	}
	}
	}
	return sawIllegalChar;
	}
	#endif // CFURL_INCLUDE_SCAN_CHARACTERS