scripts/wcurl - third_party/curl - Git at Google

 #!/bin/sh

 # wcurl - a simple wrapper around curl to easily download files.
 #
 # Requires curl >= 7.46.0 (2015)
 #
 # Copyright (C) Samuel Henrique <samueloph@debian.org>, Sergio Durigan
 # Junior <sergiodj@debian.org> and many contributors, see the AUTHORS
 # file.
 #
 # Permission to use, copy, modify, and distribute this software for any purpose
 # with or without fee is hereby granted, provided that the above copyright
 # notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
 # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 # OR OTHER DEALINGS IN THE SOFTWARE.
 #
 # Except as contained in this notice, the name of a copyright holder shall not be
 # used in advertising or otherwise to promote the sale, use or other dealings in
 # this Software without prior written authorization of the copyright holder.
 #
 # SPDX-License-Identifier: curl

 # Stop on errors and on usage of unset variables.
 set -eu

 VERSION="2025.11.04"

 PROGRAM_NAME="$(basename "$0")"
 readonly PROGRAM_NAME

 # Display the version.
 print_version()
 {
     cat << _EOF_
 ${VERSION}
 _EOF_
 }

 # Display the program usage.
 usage()
 {
     cat << _EOF_
 ${PROGRAM_NAME} -- a simple wrapper around curl to easily download files.

 Usage: ${PROGRAM_NAME} <URL>...
        ${PROGRAM_NAME} [--curl-options <CURL_OPTIONS>]... [--no-decode-filename] [-o|-O|--output <PATH>] [--dry-run] [--] <URL>...
        ${PROGRAM_NAME} [--curl-options=<CURL_OPTIONS>]... [--no-decode-filename] [--output=<PATH>] [--dry-run] [--] <URL>...
        ${PROGRAM_NAME} -h|--help
        ${PROGRAM_NAME} -V|--version

 Options:

   --curl-options <CURL_OPTIONS>: Specify extra options to be passed when invoking curl. May be
                                  specified more than once.

   -o, -O, --output <PATH>: Use the provided output path instead of getting it from the URL. If
                            multiple URLs are provided, resulting files share the same name with a
                            number appended to the end (curl >= 7.83.0). If this option is provided
                            multiple times, only the last value is considered.

   --no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in
                         the URL was done by wcurl, e.g.: The URL contained whitespace.

   --dry-run: Don't actually execute curl, just print what would be invoked.

   -V, --version: Print version information.

   -h, --help: Print this usage message.

   <CURL_OPTIONS>: Any option supported by curl can be set here. This is not used by wcurl; it is
                  instead forwarded to the curl invocation.

   <URL>: URL to be downloaded. Anything that is not a parameter is considered
          an URL. Whitespace is percent-encoded and the URL is passed to curl, which
          then performs the parsing. May be specified more than once.
 _EOF_
 }

 # Display an error message and bail out.
 error()
 {
     printf "%s\n" "$*" > /dev/stderr
     exit 1
 }

 # Extra curl options provided by the user.
 # This is set per-URL for every URL provided.
 # Some options are global, but we are erroring on the side of needlessly setting
 # them multiple times instead of causing issues with parameters that needs to
 # be set per-URL.
 CURL_OPTIONS=""

 # The URLs to be downloaded.
 URLS=""

 # Variable used to be set to the percent-decoded filename parsed from the URL, unless
 # --output or --no-decode-filename are used.
 OUTPUT_PATH=""
 HAS_USER_SET_OUTPUT="false"

 # The parameters that are passed per-URL to curl.
 readonly PER_URL_PARAMETERS="\
     --fail \
     --globoff \
     --location \
     --proto-default https \
     --remote-time \
     --retry 5 "

 # Valid percent-encode codes that are considered unsafe to be decoded.
 # This is a list of space-separated percent-encoded uppercase
 # characters.
 # 2F = /
 # 5C = \
 readonly UNSAFE_PERCENT_ENCODE="2F 5C"

 # Whether to invoke curl or not.
 DRY_RUN="false"

 # Sanitize parameters.
 sanitize()
 {
     if [ -z "${URLS}" ]; then
         error "You must provide at least one URL to download."
     fi

     readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT
 }

 # Indicate via exit code whether the string given in the first parameter
 # consists solely of characters from the string given in the second parameter.
 # In other words, it returns 0 if the first parameter only contains characters
 # from the second parameter, e.g.: Are $1 characters a subset of $2 characters?
 is_subset_of()
 {
     case "${1}" in
         *[!${2}]* | '') return 1 ;;
     esac
 }

 # Indicate via exit code whether the HTML code given in the first
 # parameter is safe to be decoded.
 is_safe_percent_encode()
 {
     upper_str=$(printf "%s" "${1}" | tr "[:lower:]" "[:upper:]")
     for unsafe in ${UNSAFE_PERCENT_ENCODE}; do
         if [ "${unsafe}" = "${upper_str}" ]; then
             return 1
         fi
     done

     return 0
 }

 # Print the given string percent-decoded.
 percent_decode()
 {
     # Encodings of control characters (00-1F) are passed through without decoding.
     # Iterate on the input character-by-character, decoding it.
     printf "%s\n" "${1}" | fold -w1 | while IFS= read -r decode_out; do
         # If character is a "%", read the next character as decode_hex1.
         if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then
             decode_out="${decode_out}${decode_hex1}"
             # If there's one more character, read it as decode_hex2.
             if IFS= read -r decode_hex2; then
                 decode_out="${decode_out}${decode_hex2}"
                 # Skip decoding if this is a control character (00-1F).
                 # Skip decoding if DECODE_FILENAME is not "true".
                 if [ "${DECODE_FILENAME}" = "true" ] \
                     && is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" \
                     && is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" \
                     && is_safe_percent_encode "${decode_out}"; then
                     # Use printf to decode it into octal and then decode it to the final format.
                     decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")"
                 fi
             fi
         fi
         printf %s "${decode_out}"
     done
 }

 # Print the percent-decoded filename portion of the given URL.
 get_url_filename()
 {
     # Remove protocol and query string if present.
     hostname_and_path="$(printf %s "${1}" | sed -e 's,^[^/]*//,,' -e 's,?.*$,,')"
     # If what remains contains a slash, there's a path; return it percent-decoded.
     case "${hostname_and_path}" in
         # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something"
         */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')" ;;
     esac
     # No slash means there was just a hostname and no path; return empty string.
 }

 # Execute curl with the list of URLs provided by the user.
 exec_curl()
 {
     CMD="curl "

     # Store version to check if it supports --no-clobber, --parallel and --parallel-max-host.
     curl_version=$($CMD --version | cut -f2 -d' ' | head -n1)
     curl_version_major=$(echo "$curl_version" | cut -f1 -d.)
     curl_version_minor=$(echo "$curl_version" | cut -f2 -d.)

     CURL_NO_CLOBBER=""
     CURL_PARALLEL=""
     # --no-clobber is only supported since 7.83.0.
     # --parallel is only supported since 7.66.0.
     # --parallel-max-host is only supported since 8.16.0.
     if [ "${curl_version_major}" -ge 8 ]; then
         CURL_NO_CLOBBER="--no-clobber"
         CURL_PARALLEL="--parallel"
         if [ "${curl_version_minor}" -ge 16 ]; then
             CURL_PARALLEL="--parallel --parallel-max-host 5"
         fi
     elif [ "${curl_version_major}" -eq 7 ]; then
         if [ "${curl_version_minor}" -ge 83 ]; then
             CURL_NO_CLOBBER="--no-clobber"
         fi
         if [ "${curl_version_minor}" -ge 66 ]; then
             CURL_PARALLEL="--parallel"
         fi
     fi

     # Detecting whether we need --parallel. It's easier to rely on
     # the shell's argument parsing.
     # shellcheck disable=SC2086
     set -- $URLS

     # If there are less than two URLs, don't set the parallel flag.
     if [ "$#" -lt 2 ]; then
         CURL_PARALLEL=""
     fi

     # Start assembling the command.
     #
     # We use 'set --' here (again) because (a) we don't have arrays on
     # POSIX shell, and (b) we need better control over the way we
     # split arguments.
     #
     # shellcheck disable=SC2086
     set -- ${CMD} ${CURL_PARALLEL}

     NEXT_PARAMETER=""
     for url in ${URLS}; do
         # If the user did not provide an output path, define one.
         if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then
             OUTPUT_PATH="$(get_url_filename "${url}")"
             # If we could not get a path from the URL, use the default: index.html.
             [ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html
         fi
         # shellcheck disable=SC2086
         set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_NO_CLOBBER} --output "${OUTPUT_PATH}" ${CURL_OPTIONS} "${url}"
         NEXT_PARAMETER="--next"
     done

     if [ "${DRY_RUN}" = "false" ]; then
         exec "$@"
     else
         printf "%s\n" "$@"
     fi
 }

 # Default to decoding the output filename
 DECODE_FILENAME="true"

 # Use "${1-}" in order to avoid errors because of 'set -u'.
 while [ -n "${1-}" ]; do
     case "${1}" in
         --curl-options=*)
             opt=$(printf "%s\n" "${1}" | sed 's/^--curl-options=//')
             CURL_OPTIONS="${CURL_OPTIONS} ${opt}"
             ;;

         --curl-options)
             shift
             CURL_OPTIONS="${CURL_OPTIONS} ${1}"
             ;;

         --dry-run)
             DRY_RUN="true"
             ;;

         --output=*)
             opt=$(printf "%s\n" "${1}" | sed 's/^--output=//')
             HAS_USER_SET_OUTPUT="true"
             OUTPUT_PATH="${opt}"
             ;;

         -o | -O | --output)
             shift
             HAS_USER_SET_OUTPUT="true"
             OUTPUT_PATH="${1}"
             ;;

         -o* | -O*)
             opt=$(printf "%s\n" "${1}" | sed 's/^-[oO]//')
             HAS_USER_SET_OUTPUT="true"
             OUTPUT_PATH="${opt}"
             ;;

         --no-decode-filename)
             DECODE_FILENAME="false"
             ;;

         -h | --help)
             usage
             exit 0
             ;;

         -V | --version)
             print_version
             exit 0
             ;;

         --)
             # This is the start of the list of URLs.
             shift
             for url in "$@"; do
                 # Encode whitespace into %20, since wget supports those URLs.
                 newurl=$(printf "%s\n" "${url}" | sed 's/ /%20/g')
                 URLS="${URLS} ${newurl}"
             done
             break
             ;;

         -*)
             error "Unknown option: '$1'."
             ;;

         *)
             # This must be a URL.
             # Encode whitespace into %20, since wget supports those URLs.
             newurl=$(printf "%s\n" "${1}" | sed 's/ /%20/g')
             URLS="${URLS} ${newurl}"
             ;;
     esac
     shift
 done

 sanitize
 exec_curl
	#!/bin/sh

	# wcurl - a simple wrapper around curl to easily download files.
	#
	# Requires curl >= 7.46.0 (2015)
	#
	# Copyright (C) Samuel Henrique <samueloph@debian.org>, Sergio Durigan
	# Junior <sergiodj@debian.org> and many contributors, see the AUTHORS
	# file.
	#
	# Permission to use, copy, modify, and distribute this software for any purpose
	# with or without fee is hereby granted, provided that the above copyright
	# notice and this permission notice appear in all copies.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
	# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
	# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
	# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
	# OR OTHER DEALINGS IN THE SOFTWARE.
	#
	# Except as contained in this notice, the name of a copyright holder shall not be
	# used in advertising or otherwise to promote the sale, use or other dealings in
	# this Software without prior written authorization of the copyright holder.
	#
	# SPDX-License-Identifier: curl

	# Stop on errors and on usage of unset variables.
	set -eu

	VERSION="2025.11.04"

	PROGRAM_NAME="$(basename "$0")"
	readonly PROGRAM_NAME

	# Display the version.
	print_version()
	{
	cat << _EOF_
	${VERSION}
	_EOF_
	}

	# Display the program usage.
	usage()
	{
	cat << _EOF_
	${PROGRAM_NAME} -- a simple wrapper around curl to easily download files.

	Usage: ${PROGRAM_NAME} <URL>...
	${PROGRAM_NAME} [--curl-options <CURL_OPTIONS>]... [--no-decode-filename] [-o\|-O\|--output <PATH>] [--dry-run] [--] <URL>...
	${PROGRAM_NAME} [--curl-options=<CURL_OPTIONS>]... [--no-decode-filename] [--output=<PATH>] [--dry-run] [--] <URL>...
	${PROGRAM_NAME} -h\|--help
	${PROGRAM_NAME} -V\|--version

	Options:

	--curl-options <CURL_OPTIONS>: Specify extra options to be passed when invoking curl. May be
	specified more than once.

	-o, -O, --output <PATH>: Use the provided output path instead of getting it from the URL. If
	multiple URLs are provided, resulting files share the same name with a
	number appended to the end (curl >= 7.83.0). If this option is provided
	multiple times, only the last value is considered.

	--no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in
	the URL was done by wcurl, e.g.: The URL contained whitespace.

	--dry-run: Don't actually execute curl, just print what would be invoked.

	-V, --version: Print version information.

	-h, --help: Print this usage message.

	<CURL_OPTIONS>: Any option supported by curl can be set here. This is not used by wcurl; it is
	instead forwarded to the curl invocation.

	<URL>: URL to be downloaded. Anything that is not a parameter is considered
	an URL. Whitespace is percent-encoded and the URL is passed to curl, which
	then performs the parsing. May be specified more than once.
	_EOF_
	}

	# Display an error message and bail out.
	error()
	{
	printf "%s\n" "$*" > /dev/stderr
	exit 1
	}

	# Extra curl options provided by the user.
	# This is set per-URL for every URL provided.
	# Some options are global, but we are erroring on the side of needlessly setting
	# them multiple times instead of causing issues with parameters that needs to
	# be set per-URL.
	CURL_OPTIONS=""

	# The URLs to be downloaded.
	URLS=""

	# Variable used to be set to the percent-decoded filename parsed from the URL, unless
	# --output or --no-decode-filename are used.
	OUTPUT_PATH=""
	HAS_USER_SET_OUTPUT="false"

	# The parameters that are passed per-URL to curl.
	readonly PER_URL_PARAMETERS="\
	--fail \
	--globoff \
	--location \
	--proto-default https \
	--remote-time \
	--retry 5 "

	# Valid percent-encode codes that are considered unsafe to be decoded.
	# This is a list of space-separated percent-encoded uppercase
	# characters.
	# 2F = /
	# 5C = \
	readonly UNSAFE_PERCENT_ENCODE="2F 5C"

	# Whether to invoke curl or not.
	DRY_RUN="false"

	# Sanitize parameters.
	sanitize()
	{
	if [ -z "${URLS}" ]; then
	error "You must provide at least one URL to download."
	fi

	readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT
	}

	# Indicate via exit code whether the string given in the first parameter
	# consists solely of characters from the string given in the second parameter.
	# In other words, it returns 0 if the first parameter only contains characters
	# from the second parameter, e.g.: Are $1 characters a subset of $2 characters?
	is_subset_of()
	{
	case "${1}" in
	[!${2}] \| '') return 1 ;;
	esac
	}

	# Indicate via exit code whether the HTML code given in the first
	# parameter is safe to be decoded.
	is_safe_percent_encode()
	{
	upper_str=$(printf "%s" "${1}" \| tr "[:lower:]" "[:upper:]")
	for unsafe in ${UNSAFE_PERCENT_ENCODE}; do
	if [ "${unsafe}" = "${upper_str}" ]; then
	return 1
	fi
	done

	return 0
	}

	# Print the given string percent-decoded.
	percent_decode()
	{
	# Encodings of control characters (00-1F) are passed through without decoding.
	# Iterate on the input character-by-character, decoding it.
	printf "%s\n" "${1}" \| fold -w1 \| while IFS= read -r decode_out; do
	# If character is a "%", read the next character as decode_hex1.
	if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then
	decode_out="${decode_out}${decode_hex1}"
	# If there's one more character, read it as decode_hex2.
	if IFS= read -r decode_hex2; then
	decode_out="${decode_out}${decode_hex2}"
	# Skip decoding if this is a control character (00-1F).
	# Skip decoding if DECODE_FILENAME is not "true".
	if [ "${DECODE_FILENAME}" = "true" ] \
	&& is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" \
	&& is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" \
	&& is_safe_percent_encode "${decode_out}"; then
	# Use printf to decode it into octal and then decode it to the final format.
	decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")"
	fi
	fi
	fi
	printf %s "${decode_out}"
	done
	}

	# Print the percent-decoded filename portion of the given URL.
	get_url_filename()
	{
	# Remove protocol and query string if present.
	hostname_and_path="$(printf %s "${1}" \| sed -e 's,^[^/]//,,' -e 's,?.$,,')"
	# If what remains contains a slash, there's a path; return it percent-decoded.
	case "${hostname_and_path}" in
	# sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something"
	/) percent_decode "$(printf %s "${hostname_and_path}" \| sed -e 's,^.*/,,')" ;;
	esac
	# No slash means there was just a hostname and no path; return empty string.
	}

	# Execute curl with the list of URLs provided by the user.
	exec_curl()
	{
	CMD="curl "

	# Store version to check if it supports --no-clobber, --parallel and --parallel-max-host.
	curl_version=$($CMD --version \| cut -f2 -d' ' \| head -n1)
	curl_version_major=$(echo "$curl_version" \| cut -f1 -d.)
	curl_version_minor=$(echo "$curl_version" \| cut -f2 -d.)

	CURL_NO_CLOBBER=""
	CURL_PARALLEL=""
	# --no-clobber is only supported since 7.83.0.
	# --parallel is only supported since 7.66.0.
	# --parallel-max-host is only supported since 8.16.0.
	if [ "${curl_version_major}" -ge 8 ]; then
	CURL_NO_CLOBBER="--no-clobber"
	CURL_PARALLEL="--parallel"
	if [ "${curl_version_minor}" -ge 16 ]; then
	CURL_PARALLEL="--parallel --parallel-max-host 5"
	fi
	elif [ "${curl_version_major}" -eq 7 ]; then
	if [ "${curl_version_minor}" -ge 83 ]; then
	CURL_NO_CLOBBER="--no-clobber"
	fi
	if [ "${curl_version_minor}" -ge 66 ]; then
	CURL_PARALLEL="--parallel"
	fi
	fi

	# Detecting whether we need --parallel. It's easier to rely on
	# the shell's argument parsing.
	# shellcheck disable=SC2086
	set -- $URLS

	# If there are less than two URLs, don't set the parallel flag.
	if [ "$#" -lt 2 ]; then
	CURL_PARALLEL=""
	fi

	# Start assembling the command.
	#
	# We use 'set --' here (again) because (a) we don't have arrays on
	# POSIX shell, and (b) we need better control over the way we
	# split arguments.
	#
	# shellcheck disable=SC2086
	set -- ${CMD} ${CURL_PARALLEL}

	NEXT_PARAMETER=""
	for url in ${URLS}; do
	# If the user did not provide an output path, define one.
	if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then
	OUTPUT_PATH="$(get_url_filename "${url}")"
	# If we could not get a path from the URL, use the default: index.html.
	[ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html
	fi
	# shellcheck disable=SC2086
	set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_NO_CLOBBER} --output "${OUTPUT_PATH}" ${CURL_OPTIONS} "${url}"
	NEXT_PARAMETER="--next"
	done

	if [ "${DRY_RUN}" = "false" ]; then
	exec "$@"
	else
	printf "%s\n" "$@"
	fi
	}

	# Default to decoding the output filename
	DECODE_FILENAME="true"

	# Use "${1-}" in order to avoid errors because of 'set -u'.
	while [ -n "${1-}" ]; do
	case "${1}" in
	--curl-options=*)
	opt=$(printf "%s\n" "${1}" \| sed 's/^--curl-options=//')
	CURL_OPTIONS="${CURL_OPTIONS} ${opt}"
	;;

	--curl-options)
	shift
	CURL_OPTIONS="${CURL_OPTIONS} ${1}"
	;;

	--dry-run)
	DRY_RUN="true"
	;;

	--output=*)
	opt=$(printf "%s\n" "${1}" \| sed 's/^--output=//')
	HAS_USER_SET_OUTPUT="true"
	OUTPUT_PATH="${opt}"
	;;

	-o \| -O \| --output)
	shift
	HAS_USER_SET_OUTPUT="true"
	OUTPUT_PATH="${1}"
	;;

	-o* \| -O*)
	opt=$(printf "%s\n" "${1}" \| sed 's/^-[oO]//')
	HAS_USER_SET_OUTPUT="true"
	OUTPUT_PATH="${opt}"
	;;

	--no-decode-filename)
	DECODE_FILENAME="false"
	;;

	-h \| --help)
	usage
	exit 0
	;;

	-V \| --version)
	print_version
	exit 0
	;;

	--)
	# This is the start of the list of URLs.
	shift
	for url in "$@"; do
	# Encode whitespace into %20, since wget supports those URLs.
	newurl=$(printf "%s\n" "${url}" \| sed 's/ /%20/g')
	URLS="${URLS} ${newurl}"
	done
	break
	;;

	-*)
	error "Unknown option: '$1'."
	;;

	*)
	# This must be a URL.
	# Encode whitespace into %20, since wget supports those URLs.
	newurl=$(printf "%s\n" "${1}" \| sed 's/ /%20/g')
	URLS="${URLS} ${newurl}"
	;;
	esac
	shift
	done

	sanitize
	exec_curl