Auto merge of #123681 - pietroalbini:pa-cve-2024-24576-stable, r=pietroalbini

[stable] Prepare Rust 1.77.2

See https://blog.rust-lang.org/2024/04/09/cve-2024-24576.html
diff --git a/RELEASES.md b/RELEASES.md
index 29fe44a..574739b 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,3 +1,10 @@
+Version 1.77.2 (2024-04-09)
+===========================
+
+<a id="1.77.2"></a>
+
+- [CVE-2024-24576: fix escaping of Windows batch file arguments in `std::process::Command`](https://blog.rust-lang.org/2024/04/09/cve-2024-24576.html)
+
 Version 1.77.1 (2024-03-28)
 ===========================
 
diff --git a/library/std/src/os/windows/process.rs b/library/std/src/os/windows/process.rs
index 5bf0154..19a593c 100644
--- a/library/std/src/os/windows/process.rs
+++ b/library/std/src/os/windows/process.rs
@@ -196,8 +196,60 @@
 
     /// Append literal text to the command line without any quoting or escaping.
     ///
-    /// This is useful for passing arguments to `cmd.exe /c`, which doesn't follow
-    /// `CommandLineToArgvW` escaping rules.
+    /// This is useful for passing arguments to applications which doesn't follow
+    /// the standard C run-time escaping rules, such as `cmd.exe /c`.
+    ///
+    /// # Bat files
+    ///
+    /// Note the `cmd /c` command line has slightly different escaping rules then bat files
+    /// themselves. If possible, it may be better to write complex arguments to a temporary
+    /// .bat file, with appropriate escaping, and simply run that using:
+    ///
+    /// ```no_run
+    /// # use std::process::Command;
+    /// # let temp_bat_file = "";
+    /// # #[allow(unused)]
+    /// let output = Command::new("cmd").args(["/c", &format!("\"{temp_bat_file}\"")]).output();
+    /// ```
+    ///
+    /// # Example
+    ///
+    /// Run a bat script using both trusted and untrusted arguments.
+    ///
+    /// ```no_run
+    /// #[cfg(windows)]
+    /// // `my_script_path` is a path to known bat file.
+    /// // `user_name` is an untrusted name given by the user.
+    /// fn run_script(
+    ///     my_script_path: &str,
+    ///     user_name: &str,
+    /// ) -> Result<std::process::Output, std::io::Error> {
+    ///     use std::io::{Error, ErrorKind};
+    ///     use std::os::windows::process::CommandExt;
+    ///     use std::process::Command;
+    ///
+    ///     // Create the command line, making sure to quote the script path.
+    ///     // This assumes the fixed arguments have been tested to work with the script we're using.
+    ///     let mut cmd_args = format!(r#""{my_script_path}" "--features=[a,b,c]""#);
+    ///
+    ///     // Make sure the user name is safe. In particular we need to be
+    ///     // cautious of ascii symbols that cmd may interpret specially.
+    ///     // Here we only allow alphanumeric characters.
+    ///     if !user_name.chars().all(|c| c.is_alphanumeric()) {
+    ///         return Err(Error::new(ErrorKind::InvalidInput, "invalid user name"));
+    ///     }
+    ///     // now we've checked the user name, let's add that too.
+    ///     cmd_args.push(' ');
+    ///     cmd_args.push_str(&format!("--user {user_name}"));
+    ///
+    ///     // call cmd.exe and return the output
+    ///     Command::new("cmd.exe")
+    ///         .arg("/c")
+    ///         // surround the entire command in an extra pair of quotes, as required by cmd.exe.
+    ///         .raw_arg(&format!("\"{cmd_args}\""))
+    ///         .output()
+    /// }
+    /// ````
     #[stable(feature = "windows_process_extensions_raw_arg", since = "1.62.0")]
     fn raw_arg<S: AsRef<OsStr>>(&mut self, text_to_append_as_is: S) -> &mut process::Command;
 
diff --git a/library/std/src/process.rs b/library/std/src/process.rs
index 4a7f5d8..b98ed05 100644
--- a/library/std/src/process.rs
+++ b/library/std/src/process.rs
@@ -88,6 +88,47 @@
 //! assert_eq!(b"test", output.stdout.as_slice());
 //! ```
 //!
+//! # Windows argument splitting
+//!
+//! On Unix systems arguments are passed to a new process as an array of strings
+//! but on Windows arguments are passed as a single commandline string and it's
+//! up to the child process to parse it into an array. Therefore the parent and
+//! child processes must agree on how the commandline string is encoded.
+//!
+//! Most programs use the standard C run-time `argv`, which in practice results
+//! in consistent argument handling. However some programs have their own way of
+//! parsing the commandline string. In these cases using [`arg`] or [`args`] may
+//! result in the child process seeing a different array of arguments then the
+//! parent process intended.
+//!
+//! Two ways of mitigating this are:
+//!
+//! * Validate untrusted input so that only a safe subset is allowed.
+//! * Use [`raw_arg`] to build a custom commandline. This bypasses the escaping
+//!   rules used by [`arg`] so should be used with due caution.
+//!
+//! `cmd.exe` and `.bat` use non-standard argument parsing and are especially
+//! vulnerable to malicious input as they may be used to run arbitrary shell
+//! commands. Untrusted arguments should be restricted as much as possible.
+//! For examples on handling this see [`raw_arg`].
+//!
+//! ### Bat file special handling
+//!
+//! On Windows, `Command` uses the Windows API function [`CreateProcessW`] to
+//! spawn new processes. An undocumented feature of this function is that,
+//! when given a `.bat` file as the application to run, it will automatically
+//! convert that into running `cmd.exe /c` with the bat file as the next argument.
+//!
+//! For historical reasons Rust currently preserves this behaviour when using
+//! [`Command::new`], and escapes the arguments according to `cmd.exe` rules.
+//! Due to the complexity of `cmd.exe` argument handling, it might not be
+//! possible to safely escape some special chars, and using them will result
+//! in an error being returned at process spawn. The set of unescapeable
+//! special chars might change between releases.
+//!
+//! Also note that running `.bat` scripts in this way may be removed in the
+//! future and so should not be relied upon.
+//!
 //! [`spawn`]: Command::spawn
 //! [`output`]: Command::output
 //!
@@ -97,6 +138,12 @@
 //!
 //! [`Write`]: io::Write
 //! [`Read`]: io::Read
+//!
+//! [`arg`]: Command::arg
+//! [`args`]: Command::args
+//! [`raw_arg`]: crate::os::windows::process::CommandExt::raw_arg
+//!
+//! [`CreateProcessW`]: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessw
 
 #![stable(feature = "process", since = "1.0.0")]
 #![deny(unsafe_op_in_unsafe_fn)]
@@ -611,6 +658,22 @@
     /// escaped characters, word splitting, glob patterns, variable substitution, etc.
     /// have no effect.
     ///
+    /// <div class="warning">
+    ///
+    /// On Windows use caution with untrusted inputs. Most applications use the
+    /// standard convention for decoding arguments passed to them. These are safe to use with `arg`.
+    /// However some applications, such as `cmd.exe` and `.bat` files, use a non-standard way of decoding arguments
+    /// and are therefore vulnerable to malicious input.
+    /// In the case of `cmd.exe` this is especially important because a malicious argument can potentially run arbitrary shell commands.
+    ///
+    /// See [Windows argument splitting][windows-args] for more details
+    /// or [`raw_arg`] for manually implementing non-standard argument encoding.
+    ///
+    /// [`raw_arg`]: crate::os::windows::process::CommandExt::raw_arg
+    /// [windows-args]: crate::process#windows-argument-splitting
+    ///
+    /// </div>
+    ///
     /// # Examples
     ///
     /// Basic usage:
@@ -641,6 +704,22 @@
     /// escaped characters, word splitting, glob patterns, variable substitution, etc.
     /// have no effect.
     ///
+    /// <div class="warning">
+    ///
+    /// On Windows use caution with untrusted inputs. Most applications use the
+    /// standard convention for decoding arguments passed to them. These are safe to use with `args`.
+    /// However some applications, such as `cmd.exe` and `.bat` files, use a non-standard way of decoding arguments
+    /// and are therefore vulnerable to malicious input.
+    /// In the case of `cmd.exe` this is especially important because a malicious argument can potentially run arbitrary shell commands.
+    ///
+    /// See [Windows argument splitting][windows-args] for more details
+    /// or [`raw_arg`] for manually implementing non-standard argument encoding.
+    ///
+    /// [`raw_arg`]: crate::os::windows::process::CommandExt::raw_arg
+    /// [windows-args]: crate::process#windows-argument-splitting
+    ///
+    /// </div>
+    ///
     /// # Examples
     ///
     /// Basic usage:
diff --git a/library/std/src/sys/pal/windows/args.rs b/library/std/src/sys/pal/windows/args.rs
index fbbdbc2..48bcb89 100644
--- a/library/std/src/sys/pal/windows/args.rs
+++ b/library/std/src/sys/pal/windows/args.rs
@@ -7,7 +7,7 @@
 mod tests;
 
 use super::os::current_exe;
-use crate::ffi::OsString;
+use crate::ffi::{OsStr, OsString};
 use crate::fmt;
 use crate::io;
 use crate::num::NonZeroU16;
@@ -17,6 +17,7 @@
 use crate::sys::process::ensure_no_nuls;
 use crate::sys::{c, to_u16s};
 use crate::sys_common::wstr::WStrUnits;
+use crate::sys_common::AsInner;
 use crate::vec;
 
 use crate::iter;
@@ -262,16 +263,92 @@
     Ok(())
 }
 
+fn append_bat_arg(cmd: &mut Vec<u16>, arg: &OsStr, mut quote: bool) -> io::Result<()> {
+    ensure_no_nuls(arg)?;
+    // If an argument has 0 characters then we need to quote it to ensure
+    // that it actually gets passed through on the command line or otherwise
+    // it will be dropped entirely when parsed on the other end.
+    //
+    // We also need to quote the argument if it ends with `\` to guard against
+    // bat usage such as `"%~2"` (i.e. force quote arguments) otherwise a
+    // trailing slash will escape the closing quote.
+    if arg.is_empty() || arg.as_encoded_bytes().last() == Some(&b'\\') {
+        quote = true;
+    }
+    for cp in arg.as_inner().inner.code_points() {
+        if let Some(cp) = cp.to_char() {
+            // Rather than trying to find every ascii symbol that must be quoted,
+            // we assume that all ascii symbols must be quoted unless they're known to be good.
+            // We also quote Unicode control blocks for good measure.
+            // Note an unquoted `\` is fine so long as the argument isn't otherwise quoted.
+            static UNQUOTED: &str = r"#$*+-./:?@\_";
+            let ascii_needs_quotes =
+                cp.is_ascii() && !(cp.is_ascii_alphanumeric() || UNQUOTED.contains(cp));
+            if ascii_needs_quotes || cp.is_control() {
+                quote = true;
+            }
+        }
+    }
+
+    if quote {
+        cmd.push('"' as u16);
+    }
+    // Loop through the string, escaping `\` only if followed by `"`.
+    // And escaping `"` by doubling them.
+    let mut backslashes: usize = 0;
+    for x in arg.encode_wide() {
+        if x == '\\' as u16 {
+            backslashes += 1;
+        } else {
+            if x == '"' as u16 {
+                // Add n backslashes to total 2n before internal `"`.
+                cmd.extend((0..backslashes).map(|_| '\\' as u16));
+                // Appending an additional double-quote acts as an escape.
+                cmd.push(b'"' as u16)
+            } else if x == '%' as u16 || x == '\r' as u16 {
+                // yt-dlp hack: replaces `%` with `%%cd:~,%` to stop %VAR% being expanded as an environment variable.
+                //
+                // # Explanation
+                //
+                // cmd supports extracting a substring from a variable using the following syntax:
+                //     %variable:~start_index,end_index%
+                //
+                // In the above command `cd` is used as the variable and the start_index and end_index are left blank.
+                // `cd` is a built-in variable that dynamically expands to the current directory so it's always available.
+                // Explicitly omitting both the start and end index creates a zero-length substring.
+                //
+                // Therefore it all resolves to nothing. However, by doing this no-op we distract cmd.exe
+                // from potentially expanding %variables% in the argument.
+                cmd.extend_from_slice(&[
+                    '%' as u16, '%' as u16, 'c' as u16, 'd' as u16, ':' as u16, '~' as u16,
+                    ',' as u16,
+                ]);
+            }
+            backslashes = 0;
+        }
+        cmd.push(x);
+    }
+    if quote {
+        // Add n backslashes to total 2n before ending `"`.
+        cmd.extend((0..backslashes).map(|_| '\\' as u16));
+        cmd.push('"' as u16);
+    }
+    Ok(())
+}
+
 pub(crate) fn make_bat_command_line(
     script: &[u16],
     args: &[Arg],
     force_quotes: bool,
 ) -> io::Result<Vec<u16>> {
+    const INVALID_ARGUMENT_ERROR: io::Error =
+        io::const_io_error!(io::ErrorKind::InvalidInput, r#"batch file arguments are invalid"#);
     // Set the start of the command line to `cmd.exe /c "`
     // It is necessary to surround the command in an extra pair of quotes,
     // hence the trailing quote here. It will be closed after all arguments
     // have been added.
-    let mut cmd: Vec<u16> = "cmd.exe /d /c \"".encode_utf16().collect();
+    // Using /e:ON enables "command extensions" which is essential for the `%` hack to work.
+    let mut cmd: Vec<u16> = "cmd.exe /e:ON /v:OFF /d /c \"".encode_utf16().collect();
 
     // Push the script name surrounded by its quote pair.
     cmd.push(b'"' as u16);
@@ -291,18 +368,22 @@
     // reconstructed by the batch script by default.
     for arg in args {
         cmd.push(' ' as u16);
-        // Make sure to always quote special command prompt characters, including:
-        // * Characters `cmd /?` says require quotes.
-        // * `%` for environment variables, as in `%TMP%`.
-        // * `|<>` pipe/redirect characters.
-        const SPECIAL: &[u8] = b"\t &()[]{}^=;!'+,`~%|<>";
-        let force_quotes = match arg {
-            Arg::Regular(arg) if !force_quotes => {
-                arg.as_encoded_bytes().iter().any(|c| SPECIAL.contains(c))
+        match arg {
+            Arg::Regular(arg_os) => {
+                let arg_bytes = arg_os.as_encoded_bytes();
+                // Disallow \r and \n as they may truncate the arguments.
+                const DISALLOWED: &[u8] = b"\r\n";
+                if arg_bytes.iter().any(|c| DISALLOWED.contains(c)) {
+                    return Err(INVALID_ARGUMENT_ERROR);
+                }
+                append_bat_arg(&mut cmd, arg_os, force_quotes)?;
             }
-            _ => force_quotes,
+            _ => {
+                // Raw arguments are passed on as-is.
+                // It's the user's responsibility to properly handle arguments in this case.
+                append_arg(&mut cmd, arg, force_quotes)?;
+            }
         };
-        append_arg(&mut cmd, arg, force_quotes)?;
     }
 
     // Close the quote we left opened earlier.
diff --git a/src/ci/scripts/install-ninja.sh b/src/ci/scripts/install-ninja.sh
index 5145a03..23cbc2e 100755
--- a/src/ci/scripts/install-ninja.sh
+++ b/src/ci/scripts/install-ninja.sh
@@ -8,7 +8,7 @@
 
 if isWindows; then
     mkdir ninja
-    curl -o ninja.zip "${MIRRORS_BASE}/2017-03-15-ninja-win.zip"
+    curl -o ninja.zip "${MIRRORS_BASE}/2024-03-28-v1.11.1-ninja-win.zip"
     7z x -oninja ninja.zip
     rm ninja.zip
     ciCommandSetEnv "RUST_CONFIGURE_ARGS" "${RUST_CONFIGURE_ARGS} --enable-ninja"
diff --git a/src/tools/tidy/src/ui_tests.rs b/src/tools/tidy/src/ui_tests.rs
index 451276b..1538a1b 100644
--- a/src/tools/tidy/src/ui_tests.rs
+++ b/src/tools/tidy/src/ui_tests.rs
@@ -43,6 +43,9 @@
     "tests/ui/shell-argfiles/shell-argfiles-badquotes.args", // passing args via a file
     "tests/ui/shell-argfiles/shell-argfiles-via-argfile-shell.args", // passing args via a file
     "tests/ui/shell-argfiles/shell-argfiles-via-argfile.args", // passing args via a file
+    "tests/ui/std/windows-bat-args1.bat", // tests escaping arguments through batch files
+    "tests/ui/std/windows-bat-args2.bat", // tests escaping arguments through batch files
+    "tests/ui/std/windows-bat-args3.bat", // tests escaping arguments through batch files
 ];
 
 fn check_entries(tests_path: &Path, bad: &mut bool) {
diff --git a/src/version b/src/version
index b113158..369f996 100644
--- a/src/version
+++ b/src/version
@@ -1 +1 @@
-1.77.1
+1.77.2
diff --git a/tests/ui/std/windows-bat-args.rs b/tests/ui/std/windows-bat-args.rs
new file mode 100644
index 0000000..d2d5fe7
--- /dev/null
+++ b/tests/ui/std/windows-bat-args.rs
@@ -0,0 +1,90 @@
+// only-windows
+// run-pass
+// run-flags:--parent-process
+
+use std::env;
+use std::io::ErrorKind::{self, InvalidInput};
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+fn main() {
+    if env::args().nth(1).as_deref() == Some("--parent-process") {
+        parent();
+    } else {
+        child();
+    }
+}
+
+fn child() {
+    if env::args().len() == 1 {
+        panic!("something went wrong :/");
+    }
+    for arg in env::args().skip(1) {
+        print!("{arg}\0");
+    }
+}
+
+fn parent() {
+    let mut bat = PathBuf::from(file!());
+    bat.set_file_name("windows-bat-args1.bat");
+    let bat1 = String::from(bat.to_str().unwrap());
+    bat.set_file_name("windows-bat-args2.bat");
+    let bat2 = String::from(bat.to_str().unwrap());
+    bat.set_file_name("windows-bat-args3.bat");
+    let bat3 = String::from(bat.to_str().unwrap());
+    let bat = [bat1.as_str(), bat2.as_str(), bat3.as_str()];
+
+    check_args(&bat, &["a", "b"]).unwrap();
+    check_args(&bat, &["c is for cat", "d is for dog"]).unwrap();
+    check_args(&bat, &["\"", " \""]).unwrap();
+    check_args(&bat, &["\\", "\\"]).unwrap();
+    check_args(&bat, &[">file.txt"]).unwrap();
+    check_args(&bat, &["whoami.exe"]).unwrap();
+    check_args(&bat, &["&a.exe"]).unwrap();
+    check_args(&bat, &["&echo hello "]).unwrap();
+    check_args(&bat, &["&echo hello", "&whoami", ">file.txt"]).unwrap();
+    check_args(&bat, &["!TMP!"]).unwrap();
+    check_args(&bat, &["key=value"]).unwrap();
+    check_args(&bat, &["\"key=value\""]).unwrap();
+    check_args(&bat, &["key = value"]).unwrap();
+    check_args(&bat, &["key=[\"value\"]"]).unwrap();
+    check_args(&bat, &["", "a=b"]).unwrap();
+    check_args(&bat, &["key=\"foo bar\""]).unwrap();
+    check_args(&bat, &["key=[\"my_value]"]).unwrap();
+    check_args(&bat, &["key=[\"my_value\",\"other-value\"]"]).unwrap();
+    check_args(&bat, &["key\\=value"]).unwrap();
+    check_args(&bat, &["key=\"&whoami\""]).unwrap();
+    check_args(&bat, &["key=\"value\"=5"]).unwrap();
+    check_args(&bat, &["key=[\">file.txt\"]"]).unwrap();
+    assert_eq!(check_args(&bat, &["\n"]), Err(InvalidInput));
+    assert_eq!(check_args(&bat, &["\r"]), Err(InvalidInput));
+    check_args(&bat, &["%hello"]).unwrap();
+    check_args(&bat, &["%PATH%"]).unwrap();
+    check_args(&bat, &["%%cd:~,%"]).unwrap();
+    check_args(&bat, &["%PATH%PATH%"]).unwrap();
+    check_args(&bat, &["\">file.txt"]).unwrap();
+    check_args(&bat, &["abc\"&echo hello"]).unwrap();
+    check_args(&bat, &["123\">file.txt"]).unwrap();
+    check_args(&bat, &["\"&echo hello&whoami.exe"]).unwrap();
+    check_args(&bat, &[r#"hello^"world"#, "hello &echo oh no >file.txt"]).unwrap();
+}
+
+// Check if the arguments roundtrip through a bat file and back into a Rust process.
+// Our Rust process outptuts the arguments as null terminated strings.
+#[track_caller]
+fn check_args(bats: &[&str], args: &[&str]) -> Result<(), ErrorKind> {
+    for bat in bats {
+        let output = Command::new(&bat).args(args).output().map_err(|e| e.kind())?;
+        assert!(output.status.success());
+        let child_args = String::from_utf8(output.stdout).unwrap();
+        let mut child_args: Vec<&str> =
+            child_args.strip_suffix('\0').unwrap().split('\0').collect();
+        // args3.bat can append spurious empty arguments, so trim them here.
+        child_args.truncate(
+            child_args.iter().rposition(|s| !s.is_empty()).unwrap_or(child_args.len() - 1) + 1,
+        );
+        assert_eq!(&child_args, &args);
+        assert!(!Path::new("file.txt").exists());
+    }
+    Ok(())
+}
diff --git a/tests/ui/std/windows-bat-args1.bat b/tests/ui/std/windows-bat-args1.bat
new file mode 100644
index 0000000..edd36bd
--- /dev/null
+++ b/tests/ui/std/windows-bat-args1.bat
@@ -0,0 +1 @@
+@a.exe %*
diff --git a/tests/ui/std/windows-bat-args2.bat b/tests/ui/std/windows-bat-args2.bat
new file mode 100644
index 0000000..8d5a7dd
--- /dev/null
+++ b/tests/ui/std/windows-bat-args2.bat
@@ -0,0 +1 @@
+@a.exe %1 %2 %3 %4 %5 %6 %7 %8 %9
diff --git a/tests/ui/std/windows-bat-args3.bat b/tests/ui/std/windows-bat-args3.bat
new file mode 100644
index 0000000..7fe360a
--- /dev/null
+++ b/tests/ui/std/windows-bat-args3.bat
@@ -0,0 +1 @@
+@a.exe "%~1" "%~2" "%~3" "%~4" "%~5" "%~6" "%~7" "%~8" "%~9"