src/lib/url/gurl.h - fuchsia - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef SRC_LIB_URL_GURL_H_
 #define SRC_LIB_URL_GURL_H_

 #include <iosfwd>
 #include <string>

 #include "src/lib/fxl/strings/string_view.h"
 #include "src/lib/url/third_party/mozilla/url_parse.h"
 #include "src/lib/url/url_canon.h"
 #include "src/lib/url/url_canon_stdstring.h"
 #include "src/lib/url/url_constants.h"
 #include "src/lib/url/url_export.h"

 namespace url {

 class URL_EXPORT GURL {
  public:
   // Creates an empty, invalid URL.
   GURL();

   // Copy construction is relatively inexpensive, with most of the time going
   // to reallocating the string. It does not re-parse.
   GURL(const GURL& other);

   // The narrow version requires the input be UTF-8. Invalid UTF-8 input will
   // result in an invalid URL.
   //
   // The wide version should also take an encoding parameter so we know how to
   // encode the query parameters. It is probably sufficient for the narrow
   // version to assume the query parameter encoding should be the same as the
   // input encoding.
   explicit GURL(const std::string& url_string /*, output_param_encoding*/);

   // Constructor for URLs that have already been parsed and canonicalized. This
   // is used for conversions from KURL, for example. The caller must supply all
   // information associated with the URL, which must be correct and consistent.
   GURL(const char* canonical_spec, size_t canonical_spec_len, const url::Parsed& parsed,
        bool is_valid);
   // Notice that we take the canonical_spec by value so that we can convert
   // from WebURL without copying the string. When we call this constructor
   // we pass in a temporary std::string, which lets the compiler skip the
   // copy and just move the std::string into the function argument. In the
   // implementation, we use swap to move the data into the GURL itself,
   // which means we end up with zero copies.
   GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid);

   ~GURL();

   GURL& operator=(GURL other);

   // Returns true when this object represents a valid parsed URL. When not
   // valid, other functions will still succeed, but you will not get canonical
   // data out in the format you may be expecting. Instead, we keep something
   // "reasonable looking" so that the user can see how it's busted if
   // displayed to them.
   bool is_valid() const { return is_valid_; }

   // Returns true if the URL is zero-length. Note that empty URLs are also
   // invalid, and is_valid() will return false for them. This is provided
   // because some users may want to treat the empty case differently.
   bool is_empty() const { return spec_.empty(); }

   // Returns the raw spec, i.e., the full text of the URL, in canonical UTF-8,
   // if the URL is valid. If the URL is not valid, this will assert and return
   // the empty string (for safety in release builds, to keep them from being
   // misused which might be a security problem).
   //
   // The URL will be ASCII except the reference fragment, which may be UTF-8.
   // It is guaranteed to be valid UTF-8.
   //
   // The exception is for empty() URLs (which are !is_valid()) but this will
   // return the empty string without asserting.
   //
   // Used invalid_spec() below to get the unusable spec of an invalid URL. This
   // separation is designed to prevent errors that may cause security problems
   // that could result from the mistaken use of an invalid URL.
   const std::string& spec() const;

   // Returns the potentially invalid spec for a the URL. This spec MUST NOT be
   // modified or sent over the network. It is designed to be displayed in error
   // messages to the user, as the appearance of the spec may explain the error.
   // If the spec is valid, the valid spec will be returned.
   //
   // The returned string is guaranteed to be valid UTF-8.
   const std::string& possibly_invalid_spec() const { return spec_; }

   // Getter for the raw parsed structure. This allows callers to locate parts
   // of the URL within the spec themselves. Most callers should consider using
   // the individual component getters below.
   //
   // The returned parsed structure will reference into the raw spec, which may
   // or may not be valid. If you are using this to index into the spec, BE
   // SURE YOU ARE USING possibly_invalid_spec() to get the spec, and that you
   // don't do anything "important" with invalid specs.
   const url::Parsed& parsed_for_possibly_invalid_spec() const { return parsed_; }

   // Defiant equality operator!
   bool operator==(const GURL& other) const;
   bool operator!=(const GURL& other) const;

   // Allows GURL to used as a key in STL (for example, a std::set or std::map).
   bool operator<(const GURL& other) const;
   bool operator>(const GURL& other) const;

   // Resolves a URL that's possibly relative to this object's URL, and returns
   // it. Absolute URLs are also handled according to the rules of URLs on web
   // pages.
   //
   // It may be impossible to resolve the URLs properly. If the input is not
   // "standard" (IsStandard() == false) and the input looks relative, we can't
   // resolve it. In these cases, the result will be an empty, invalid GURL.
   //
   // The result may also be a nonempty, invalid URL if the input has some kind
   // of encoding error. In these cases, we will try to construct a "good" URL
   // that may have meaning to the user, but it will be marked invalid.
   //
   // It is an error to resolve a URL relative to an invalid URL. The result
   // will be the empty URL.
   GURL Resolve(const std::string& relative) const;

   // A helper function that is equivalent to replacing the path with a slash
   // and clearing out everything after that. We sometimes need to know just the
   // scheme and the authority. If this URL is not a standard URL (it doesn't
   // have the regular authority and path sections), then the result will be
   // an empty, invalid GURL. Note that this *does* work for file: URLs, which
   // some callers may want to filter out before calling this.
   //
   // It is an error to get an empty path on an invalid URL. The result
   // will be the empty URL.
   GURL GetWithEmptyPath() const;

   // Returns true if the scheme for the current URL is a known "standard-format"
   // scheme. A standard-format scheme adheres to what RFC 3986 calls "generic
   // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes
   // file: and filesystem:, which some callers may want to filter out explicitly
   // by calling SchemeIsFile[System].
   bool IsStandard() const;

   // Returns true if the given parameter (should be lower-case ASCII to match
   // the canonicalized scheme) is the scheme for this URL. This call is more
   // efficient than getting the scheme and comparing it because no copies or
   // object constructions are done.
   bool SchemeIs(const char* lower_ascii_scheme) const;

   // Returns true if the scheme is "http" or "https".
   bool SchemeIsHTTPOrHTTPS() const;

   // Returns true is the scheme is "ws" or "wss".
   bool SchemeIsWSOrWSS() const;

   // We often need to know if this is a file URL. File URLs are "standard", but
   // are often treated separately by some programs.
   bool SchemeIsFile() const { return SchemeIs(url::kFileScheme); }

   // Returns true if the scheme indicates a secure connection.
   //
   // NOTE: This function is deprecated. You probably want
   // |SchemeIsCryptographic| (if you just want to know if a scheme uses TLS for
   // network transport) or Chromium's |IsOriginSecure| for a higher-level test
   // about an origin's security. See those functions' documentation for more
   // detail.
   //
   // TODO(palmer): Audit callers and change them to |SchemeIsCryptographic| or
   // |IsOriginSecure|, as appropriate. Then remove |SchemeIsSecure|.
   // crbug.com/362214
   bool SchemeIsSecure() const { return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme); }

   // Returns true if the scheme indicates a network connection that uses TLS or
   // some other cryptographic protocol (e.g. QUIC) for security.
   //
   // This function is a not a complete test of whether or not an origin's code
   // is minimally trustworthy. For that, see Chromium's |IsOriginSecure| for a
   // higher-level and more complete semantics. See that function's documentation
   // for more detail.
   bool SchemeIsCryptographic() const {
     return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme);
   }

   // Returns true if the scheme is "blob".
   bool SchemeIsBlob() const { return SchemeIs(url::kBlobScheme); }

   // The "content" of the URL is everything after the scheme (skipping the
   // scheme delimiting colon). It is an error to get the content of an invalid
   // URL: the result will be an empty string.
   std::string GetContent() const;

   // Returns true if the hostname is an IP address. Note: this function isn't
   // as cheap as a simple getter because it re-parses the hostname to verify.
   bool HostIsIPAddress() const;

   // Getters for various components of the URL. The returned string will be
   // empty if the component is empty or is not present.
   std::string scheme() const {  // Not including the colon. See also SchemeIs.
     return ComponentString(parsed_.scheme);
   }
   std::string username() const { return ComponentString(parsed_.username); }
   std::string password() const { return ComponentString(parsed_.password); }
   // Note that this may be a hostname, an IPv4 address, or an IPv6 literal
   // surrounded by square brackets, like "[2001:db8::1]".  To exclude these
   // brackets, use HostNoBrackets() below.
   std::string host() const { return ComponentString(parsed_.host); }
   std::string port() const {  // Returns -1 if "default"
     return ComponentString(parsed_.port);
   }
   std::string path() const {  // Including first slash following host
     return ComponentString(parsed_.path);
   }
   std::string query() const {  // Stuff following '?'
     return ComponentString(parsed_.query);
   }
   std::string ref() const {  // Stuff following '#'
     return ComponentString(parsed_.ref);
   }

   // Existence querying. These functions will return true if the corresponding
   // URL component exists in this URL. Note that existence is different than
   // being nonempty. http://www.google.com/? has a query that just happens to
   // be empty, and has_query() will return true.
   bool has_scheme() const { return parsed_.scheme.is_nonempty(); }
   bool has_username() const { return parsed_.username.is_nonempty(); }
   bool has_password() const { return parsed_.password.is_nonempty(); }
   bool has_host() const {
     // Note that hosts are special, absence of host means length 0.
     return parsed_.host.is_nonempty();
   }
   bool has_port() const { return parsed_.port.is_nonempty(); }
   bool has_path() const {
     // Note that http://www.google.com/" has a path, the path is "/". This can
     // return false only for invalid or nonstandard URLs.
     return parsed_.path.is_nonempty();
   }
   bool has_query() const { return parsed_.query.is_nonempty(); }
   bool has_ref() const { return parsed_.ref.is_nonempty(); }

   // Returns a parsed version of the port. Can also be any of the special
   // values defined in Parsed for ExtractPort.
   int IntPort() const;

   // Returns the port number of the URL, or the default port number.
   // If the scheme has no concept of port (or unknown default) returns
   // PORT_UNSPECIFIED.
   int EffectiveIntPort() const;

   // Extracts the filename portion of the path and returns it. The filename
   // is everything after the last slash in the path. This may be empty.
   std::string ExtractFileName() const;

   // Returns the path that should be sent to the server. This is the path,
   // parameter, and query portions of the URL. It is guaranteed to be ASCII.
   std::string PathForRequest() const;

   // Returns the host, excluding the square brackets surrounding IPv6 address
   // literals. This can be useful for passing to getaddrinfo().
   std::string HostNoBrackets() const;

   // Returns true if this URL's host matches or is in the same domain as
   // the given input string. For example, if the hostname of the URL is
   // "www.google.com", this will return true for "com", "google.com", and
   // "www.google.com".
   //
   // The input domain should be lower-case ASCII to match the canonicalized
   // scheme. This call is more efficient than getting the host and check
   // whether host has the specific domain or not because no copies or
   // object constructions are done.
   bool DomainIs(fxl::StringView lower_ascii_domain) const;

   // Swaps the contents of this GURL object with |other|, without doing
   // any memory allocations.
   void Swap(GURL* other);

   // Returns a reference to a singleton empty GURL. This object is for callers
   // who return references but don't have anything to return in some cases.
   // This function may be called from any thread.
   static const GURL& EmptyGURL();

  private:
   // Variant of the string parsing constructor that allows the caller to elect
   // retain trailing whitespace, if any, on the passed URL spec, but only if
   // the scheme is one that allows trailing whitespace. The primary use-case is
   // for data: URLs. In most cases, you want to use the single parameter
   // constructor above.
   enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
   GURL(const std::string& url_string, RetainWhiteSpaceSelector);

   void InitCanonical(fxl::StringView input_spec, bool trim_path_end);

   void InitializeFromCanonicalSpec();

   // Returns the substring of the input identified by the given component.
   std::string ComponentString(const url::Component& comp) const {
     if (comp.is_invalid_or_empty())
       return std::string();
     return std::string(spec_, comp.begin, comp.len());
   }

   // The actual text of the URL, in canonical ASCII form.
   std::string spec_;

   // Set when the given URL is valid. Otherwise, we may still have a spec and
   // components, but they may not identify valid resources (for example, an
   // invalid port number, invalid characters in the scheme, etc.).
   bool is_valid_;

   // Identified components of the canonical spec.
   url::Parsed parsed_;

   // TODO bug 684583: Add encoding for query params.
 };

 // Stream operator so GURL can be used in assertion statements.
 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);

 }  // namespace url

 #endif  // SRC_LIB_URL_GURL_H_
	// Copyright 2013 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef SRC_LIB_URL_GURL_H_
	#define SRC_LIB_URL_GURL_H_

	#include <iosfwd>
	#include <string>

	#include "src/lib/fxl/strings/string_view.h"
	#include "src/lib/url/third_party/mozilla/url_parse.h"
	#include "src/lib/url/url_canon.h"
	#include "src/lib/url/url_canon_stdstring.h"
	#include "src/lib/url/url_constants.h"
	#include "src/lib/url/url_export.h"

	namespace url {

	class URL_EXPORT GURL {
	public:
	// Creates an empty, invalid URL.
	GURL();

	// Copy construction is relatively inexpensive, with most of the time going
	// to reallocating the string. It does not re-parse.
	GURL(const GURL& other);

	// The narrow version requires the input be UTF-8. Invalid UTF-8 input will
	// result in an invalid URL.
	//
	// The wide version should also take an encoding parameter so we know how to
	// encode the query parameters. It is probably sufficient for the narrow
	// version to assume the query parameter encoding should be the same as the
	// input encoding.
	explicit GURL(const std::string& url_string /, output_param_encoding/);

	// Constructor for URLs that have already been parsed and canonicalized. This
	// is used for conversions from KURL, for example. The caller must supply all
	// information associated with the URL, which must be correct and consistent.
	GURL(const char* canonical_spec, size_t canonical_spec_len, const url::Parsed& parsed,
	bool is_valid);
	// Notice that we take the canonical_spec by value so that we can convert
	// from WebURL without copying the string. When we call this constructor
	// we pass in a temporary std::string, which lets the compiler skip the
	// copy and just move the std::string into the function argument. In the
	// implementation, we use swap to move the data into the GURL itself,
	// which means we end up with zero copies.
	GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid);

	~GURL();

	GURL& operator=(GURL other);

	// Returns true when this object represents a valid parsed URL. When not
	// valid, other functions will still succeed, but you will not get canonical
	// data out in the format you may be expecting. Instead, we keep something
	// "reasonable looking" so that the user can see how it's busted if
	// displayed to them.
	bool is_valid() const { return is_valid_; }

	// Returns true if the URL is zero-length. Note that empty URLs are also
	// invalid, and is_valid() will return false for them. This is provided
	// because some users may want to treat the empty case differently.
	bool is_empty() const { return spec_.empty(); }

	// Returns the raw spec, i.e., the full text of the URL, in canonical UTF-8,
	// if the URL is valid. If the URL is not valid, this will assert and return
	// the empty string (for safety in release builds, to keep them from being
	// misused which might be a security problem).
	//
	// The URL will be ASCII except the reference fragment, which may be UTF-8.
	// It is guaranteed to be valid UTF-8.
	//
	// The exception is for empty() URLs (which are !is_valid()) but this will
	// return the empty string without asserting.
	//
	// Used invalid_spec() below to get the unusable spec of an invalid URL. This
	// separation is designed to prevent errors that may cause security problems
	// that could result from the mistaken use of an invalid URL.
	const std::string& spec() const;

	// Returns the potentially invalid spec for a the URL. This spec MUST NOT be
	// modified or sent over the network. It is designed to be displayed in error
	// messages to the user, as the appearance of the spec may explain the error.
	// If the spec is valid, the valid spec will be returned.
	//
	// The returned string is guaranteed to be valid UTF-8.
	const std::string& possibly_invalid_spec() const { return spec_; }

	// Getter for the raw parsed structure. This allows callers to locate parts
	// of the URL within the spec themselves. Most callers should consider using
	// the individual component getters below.
	//
	// The returned parsed structure will reference into the raw spec, which may
	// or may not be valid. If you are using this to index into the spec, BE
	// SURE YOU ARE USING possibly_invalid_spec() to get the spec, and that you
	// don't do anything "important" with invalid specs.
	const url::Parsed& parsed_for_possibly_invalid_spec() const { return parsed_; }

	// Defiant equality operator!
	bool operator==(const GURL& other) const;
	bool operator!=(const GURL& other) const;

	// Allows GURL to used as a key in STL (for example, a std::set or std::map).
	bool operator<(const GURL& other) const;
	bool operator>(const GURL& other) const;

	// Resolves a URL that's possibly relative to this object's URL, and returns
	// it. Absolute URLs are also handled according to the rules of URLs on web
	// pages.
	//
	// It may be impossible to resolve the URLs properly. If the input is not
	// "standard" (IsStandard() == false) and the input looks relative, we can't
	// resolve it. In these cases, the result will be an empty, invalid GURL.
	//
	// The result may also be a nonempty, invalid URL if the input has some kind
	// of encoding error. In these cases, we will try to construct a "good" URL
	// that may have meaning to the user, but it will be marked invalid.
	//
	// It is an error to resolve a URL relative to an invalid URL. The result
	// will be the empty URL.
	GURL Resolve(const std::string& relative) const;

	// A helper function that is equivalent to replacing the path with a slash
	// and clearing out everything after that. We sometimes need to know just the
	// scheme and the authority. If this URL is not a standard URL (it doesn't
	// have the regular authority and path sections), then the result will be
	// an empty, invalid GURL. Note that this does work for file: URLs, which
	// some callers may want to filter out before calling this.
	//
	// It is an error to get an empty path on an invalid URL. The result
	// will be the empty URL.
	GURL GetWithEmptyPath() const;

	// Returns true if the scheme for the current URL is a known "standard-format"
	// scheme. A standard-format scheme adheres to what RFC 3986 calls "generic
	// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes
	// file: and filesystem:, which some callers may want to filter out explicitly
	// by calling SchemeIsFile[System].
	bool IsStandard() const;

	// Returns true if the given parameter (should be lower-case ASCII to match
	// the canonicalized scheme) is the scheme for this URL. This call is more
	// efficient than getting the scheme and comparing it because no copies or
	// object constructions are done.
	bool SchemeIs(const char* lower_ascii_scheme) const;

	// Returns true if the scheme is "http" or "https".
	bool SchemeIsHTTPOrHTTPS() const;

	// Returns true is the scheme is "ws" or "wss".
	bool SchemeIsWSOrWSS() const;

	// We often need to know if this is a file URL. File URLs are "standard", but
	// are often treated separately by some programs.
	bool SchemeIsFile() const { return SchemeIs(url::kFileScheme); }

	// Returns true if the scheme indicates a secure connection.
	//
	// NOTE: This function is deprecated. You probably want
	// \|SchemeIsCryptographic\| (if you just want to know if a scheme uses TLS for
	// network transport) or Chromium's \|IsOriginSecure\| for a higher-level test
	// about an origin's security. See those functions' documentation for more
	// detail.
	//
	// TODO(palmer): Audit callers and change them to \|SchemeIsCryptographic\| or
	// \|IsOriginSecure\|, as appropriate. Then remove \|SchemeIsSecure\|.
	// crbug.com/362214
	bool SchemeIsSecure() const { return SchemeIs(url::kHttpsScheme) \|\| SchemeIs(url::kWssScheme); }

	// Returns true if the scheme indicates a network connection that uses TLS or
	// some other cryptographic protocol (e.g. QUIC) for security.
	//
	// This function is a not a complete test of whether or not an origin's code
	// is minimally trustworthy. For that, see Chromium's \|IsOriginSecure\| for a
	// higher-level and more complete semantics. See that function's documentation
	// for more detail.
	bool SchemeIsCryptographic() const {
	return SchemeIs(url::kHttpsScheme) \|\| SchemeIs(url::kWssScheme);
	}

	// Returns true if the scheme is "blob".
	bool SchemeIsBlob() const { return SchemeIs(url::kBlobScheme); }

	// The "content" of the URL is everything after the scheme (skipping the
	// scheme delimiting colon). It is an error to get the content of an invalid
	// URL: the result will be an empty string.
	std::string GetContent() const;

	// Returns true if the hostname is an IP address. Note: this function isn't
	// as cheap as a simple getter because it re-parses the hostname to verify.
	bool HostIsIPAddress() const;

	// Getters for various components of the URL. The returned string will be
	// empty if the component is empty or is not present.
	std::string scheme() const { // Not including the colon. See also SchemeIs.
	return ComponentString(parsed_.scheme);
	}
	std::string username() const { return ComponentString(parsed_.username); }
	std::string password() const { return ComponentString(parsed_.password); }
	// Note that this may be a hostname, an IPv4 address, or an IPv6 literal
	// surrounded by square brackets, like "[2001:db8::1]". To exclude these
	// brackets, use HostNoBrackets() below.
	std::string host() const { return ComponentString(parsed_.host); }
	std::string port() const { // Returns -1 if "default"
	return ComponentString(parsed_.port);
	}
	std::string path() const { // Including first slash following host
	return ComponentString(parsed_.path);
	}
	std::string query() const { // Stuff following '?'
	return ComponentString(parsed_.query);
	}
	std::string ref() const { // Stuff following '#'
	return ComponentString(parsed_.ref);
	}

	// Existence querying. These functions will return true if the corresponding
	// URL component exists in this URL. Note that existence is different than
	// being nonempty. http://www.google.com/? has a query that just happens to
	// be empty, and has_query() will return true.
	bool has_scheme() const { return parsed_.scheme.is_nonempty(); }
	bool has_username() const { return parsed_.username.is_nonempty(); }
	bool has_password() const { return parsed_.password.is_nonempty(); }
	bool has_host() const {
	// Note that hosts are special, absence of host means length 0.
	return parsed_.host.is_nonempty();
	}
	bool has_port() const { return parsed_.port.is_nonempty(); }
	bool has_path() const {
	// Note that http://www.google.com/" has a path, the path is "/". This can
	// return false only for invalid or nonstandard URLs.
	return parsed_.path.is_nonempty();
	}
	bool has_query() const { return parsed_.query.is_nonempty(); }
	bool has_ref() const { return parsed_.ref.is_nonempty(); }

	// Returns a parsed version of the port. Can also be any of the special
	// values defined in Parsed for ExtractPort.
	int IntPort() const;

	// Returns the port number of the URL, or the default port number.
	// If the scheme has no concept of port (or unknown default) returns
	// PORT_UNSPECIFIED.
	int EffectiveIntPort() const;

	// Extracts the filename portion of the path and returns it. The filename
	// is everything after the last slash in the path. This may be empty.
	std::string ExtractFileName() const;

	// Returns the path that should be sent to the server. This is the path,
	// parameter, and query portions of the URL. It is guaranteed to be ASCII.
	std::string PathForRequest() const;

	// Returns the host, excluding the square brackets surrounding IPv6 address
	// literals. This can be useful for passing to getaddrinfo().
	std::string HostNoBrackets() const;

	// Returns true if this URL's host matches or is in the same domain as
	// the given input string. For example, if the hostname of the URL is
	// "www.google.com", this will return true for "com", "google.com", and
	// "www.google.com".
	//
	// The input domain should be lower-case ASCII to match the canonicalized
	// scheme. This call is more efficient than getting the host and check
	// whether host has the specific domain or not because no copies or
	// object constructions are done.
	bool DomainIs(fxl::StringView lower_ascii_domain) const;

	// Swaps the contents of this GURL object with \|other\|, without doing
	// any memory allocations.
	void Swap(GURL* other);

	// Returns a reference to a singleton empty GURL. This object is for callers
	// who return references but don't have anything to return in some cases.
	// This function may be called from any thread.
	static const GURL& EmptyGURL();

	private:
	// Variant of the string parsing constructor that allows the caller to elect
	// retain trailing whitespace, if any, on the passed URL spec, but only if
	// the scheme is one that allows trailing whitespace. The primary use-case is
	// for data: URLs. In most cases, you want to use the single parameter
	// constructor above.
	enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
	GURL(const std::string& url_string, RetainWhiteSpaceSelector);

	void InitCanonical(fxl::StringView input_spec, bool trim_path_end);

	void InitializeFromCanonicalSpec();

	// Returns the substring of the input identified by the given component.
	std::string ComponentString(const url::Component& comp) const {
	if (comp.is_invalid_or_empty())
	return std::string();
	return std::string(spec_, comp.begin, comp.len());
	}

	// The actual text of the URL, in canonical ASCII form.
	std::string spec_;

	// Set when the given URL is valid. Otherwise, we may still have a spec and
	// components, but they may not identify valid resources (for example, an
	// invalid port number, invalid characters in the scheme, etc.).
	bool is_valid_;

	// Identified components of the canonical spec.
	url::Parsed parsed_;

	// TODO bug 684583: Add encoding for query params.
	};

	// Stream operator so GURL can be used in assertion statements.
	URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);

	} // namespace url

	#endif // SRC_LIB_URL_GURL_H_