| // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
| // file at the top-level directory of this distribution and at |
| // http://rust-lang.org/COPYRIGHT. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| // |
| // ignore-lexer-test FIXME #15679 |
| |
| //! Base64 binary-to-text encoding |
| |
| pub use self::FromBase64Error::*; |
| pub use self::CharacterSet::*; |
| |
| use std::fmt; |
| use std::error; |
| |
| /// Available encoding character sets |
| #[derive(Clone, Copy, Debug)] |
| pub enum CharacterSet { |
| /// The standard character set (uses `+` and `/`) |
| Standard, |
| /// The URL safe character set (uses `-` and `_`) |
| UrlSafe |
| } |
| |
| /// Available newline types |
| #[derive(Clone, Copy, Debug)] |
| pub enum Newline { |
| /// A linefeed (i.e. Unix-style newline) |
| LF, |
| /// A carriage return and a linefeed (i.e. Windows-style newline) |
| CRLF |
| } |
| |
| /// Contains configuration parameters for `to_base64`. |
| #[derive(Clone, Copy, Debug)] |
| pub struct Config { |
| /// Character set to use |
| pub char_set: CharacterSet, |
| /// Newline to use |
| pub newline: Newline, |
| /// True to pad output with `=` characters |
| pub pad: bool, |
| /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping |
| pub line_length: Option<usize> |
| } |
| |
| /// Configuration for RFC 4648 standard base64 encoding |
| pub static STANDARD: Config = |
| Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None}; |
| |
| /// Configuration for RFC 4648 base64url encoding |
| pub static URL_SAFE: Config = |
| Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None}; |
| |
| /// Configuration for RFC 2045 MIME base64 encoding |
| pub static MIME: Config = |
| Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)}; |
| |
| static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ |
| abcdefghijklmnopqrstuvwxyz\ |
| 0123456789+/"; |
| |
| static URLSAFE_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ |
| abcdefghijklmnopqrstuvwxyz\ |
| 0123456789-_"; |
| |
| /// A trait for converting a value to base64 encoding. |
| pub trait ToBase64 { |
| /// Converts the value of `self` to a base64 value following the specified |
| /// format configuration, returning the owned string. |
| fn to_base64(&self, config: Config) -> String; |
| } |
| |
| impl ToBase64 for [u8] { |
| /// Turn a vector of `u8` bytes into a base64 string. |
| /// |
| /// # Example |
| /// |
| /// ```rust |
| /// extern crate rustc_serialize; |
| /// use rustc_serialize::base64::{ToBase64, STANDARD}; |
| /// |
| /// fn main () { |
| /// let str = [52,32].to_base64(STANDARD); |
| /// println!("base 64 output: {:?}", str); |
| /// } |
| /// ``` |
| fn to_base64(&self, config: Config) -> String { |
| let bytes = match config.char_set { |
| Standard => STANDARD_CHARS, |
| UrlSafe => URLSAFE_CHARS |
| }; |
| |
| let len = self.len(); |
| let newline = match config.newline { |
| Newline::LF => "\n", |
| Newline::CRLF => "\r\n", |
| }; |
| |
| // Preallocate memory. |
| let mut prealloc_len = (len + 2) / 3 * 4; |
| if let Some(line_length) = config.line_length { |
| let num_lines = match prealloc_len { |
| 0 => 0, |
| n => (n - 1) / line_length |
| }; |
| prealloc_len += num_lines * newline.bytes().count(); |
| } |
| |
| let mut out_bytes = vec![b'='; prealloc_len]; |
| |
| // Deal with padding bytes |
| let mod_len = len % 3; |
| |
| // Use iterators to reduce branching |
| { |
| let mut cur_length = 0; |
| |
| let mut s_in = self[..len - mod_len].iter().map(|&x| x as u32); |
| let mut s_out = out_bytes.iter_mut(); |
| |
| // Convenient shorthand |
| let enc = |val| bytes[val as usize]; |
| let mut write = |val| *s_out.next().unwrap() = val; |
| |
| // Iterate though blocks of 4 |
| while let (Some(first), Some(second), Some(third)) = |
| (s_in.next(), s_in.next(), s_in.next()) { |
| |
| // Line break if needed |
| if let Some(line_length) = config.line_length { |
| if cur_length >= line_length { |
| for b in newline.bytes() { write(b) }; |
| cur_length = 0; |
| } |
| } |
| |
| let n = first << 16 | second << 8 | third; |
| |
| // This 24-bit number gets separated into four 6-bit numbers. |
| write(enc((n >> 18) & 63)); |
| write(enc((n >> 12) & 63)); |
| write(enc((n >> 6 ) & 63)); |
| write(enc((n >> 0 ) & 63)); |
| |
| cur_length += 4; |
| } |
| |
| // Line break only needed if padding is required |
| if mod_len != 0 { |
| if let Some(line_length) = config.line_length { |
| if cur_length >= line_length { |
| for b in newline.bytes() { write(b) }; |
| } |
| } |
| } |
| |
| // Heh, would be cool if we knew this was exhaustive |
| // (the dream of bounded integer types) |
| match mod_len { |
| 0 => (), |
| 1 => { |
| let n = (self[len-1] as u32) << 16; |
| write(enc((n >> 18) & 63)); |
| write(enc((n >> 12) & 63)); |
| } |
| 2 => { |
| let n = (self[len-2] as u32) << 16 | |
| (self[len-1] as u32) << 8; |
| write(enc((n >> 18) & 63)); |
| write(enc((n >> 12) & 63)); |
| write(enc((n >> 6 ) & 63)); |
| } |
| _ => panic!("Algebra is broken, please alert the math police") |
| } |
| } |
| |
| // We get padding for "free", so only have to drop it if unwanted. |
| if !config.pad { |
| while let Some(&b'=') = out_bytes.last() { |
| out_bytes.pop(); |
| } |
| } |
| |
| unsafe { String::from_utf8_unchecked(out_bytes) } |
| } |
| } |
| |
| impl<'a, T: ?Sized + ToBase64> ToBase64 for &'a T { |
| fn to_base64(&self, config: Config) -> String { |
| (**self).to_base64(config) |
| } |
| } |
| |
| /// A trait for converting from base64 encoded values. |
| pub trait FromBase64 { |
| /// Converts the value of `self`, interpreted as base64 encoded data, into |
| /// an owned vector of bytes, returning the vector. |
| fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error>; |
| } |
| |
| /// Errors that can occur when decoding a base64 encoded string |
| #[derive(Clone, Copy)] |
| pub enum FromBase64Error { |
| /// The input contained a character not part of the base64 format |
| InvalidBase64Byte(u8, usize), |
| /// The input had an invalid length |
| InvalidBase64Length, |
| } |
| |
| impl fmt::Debug for FromBase64Error { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| match *self { |
| InvalidBase64Byte(ch, idx) => |
| write!(f, "Invalid character '{}' at position {}", ch, idx), |
| InvalidBase64Length => write!(f, "Invalid length"), |
| } |
| } |
| } |
| |
| impl error::Error for FromBase64Error { |
| fn description(&self) -> &str { |
| match *self { |
| InvalidBase64Byte(_, _) => "invalid character", |
| InvalidBase64Length => "invalid length", |
| } |
| } |
| } |
| |
| impl fmt::Display for FromBase64Error { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| fmt::Debug::fmt(&self, f) |
| } |
| } |
| |
| impl FromBase64 for str { |
| /// Convert any base64 encoded string (literal, `@`, `&`, or `~`) |
| /// to the byte values it encodes. |
| /// |
| /// You can use the `String::from_utf8` function to turn a `Vec<u8>` into a |
| /// string with characters corresponding to those values. |
| /// |
| /// # Example |
| /// |
| /// This converts a string literal to base64 and back. |
| /// |
| /// ```rust |
| /// extern crate rustc_serialize; |
| /// use rustc_serialize::base64::{ToBase64, FromBase64, STANDARD}; |
| /// |
| /// fn main () { |
| /// let hello_str = b"Hello, World".to_base64(STANDARD); |
| /// println!("base64 output: {}", hello_str); |
| /// let res = hello_str.from_base64(); |
| /// if res.is_ok() { |
| /// let opt_bytes = String::from_utf8(res.unwrap()); |
| /// if opt_bytes.is_ok() { |
| /// println!("decoded from base64: {:?}", opt_bytes.unwrap()); |
| /// } |
| /// } |
| /// } |
| /// ``` |
| #[inline] |
| fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { |
| self.as_bytes().from_base64() |
| } |
| } |
| |
| impl FromBase64 for [u8] { |
| fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { |
| let mut r = Vec::with_capacity(self.len()); |
| let mut buf: u32 = 0; |
| let mut modulus = 0; |
| |
| let mut it = self.iter(); |
| for byte in it.by_ref() { |
| let code = DECODE_TABLE[*byte as usize]; |
| if code >= SPECIAL_CODES_START { |
| match code { |
| NEWLINE_CODE => continue, |
| EQUALS_CODE => break, |
| INVALID_CODE => return Err(InvalidBase64Byte( |
| *byte, (byte as *const _ as usize) - self.as_ptr() as usize)), |
| _ => unreachable!(), |
| } |
| } |
| buf = (buf | code as u32) << 6; |
| modulus += 1; |
| if modulus == 4 { |
| modulus = 0; |
| r.push((buf >> 22) as u8); |
| r.push((buf >> 14) as u8); |
| r.push((buf >> 6 ) as u8); |
| } |
| } |
| |
| for byte in it { |
| match *byte { |
| b'=' | b'\r' | b'\n' => continue, |
| _ => return Err(InvalidBase64Byte( |
| *byte, (byte as *const _ as usize) - self.as_ptr() as usize)), |
| } |
| } |
| |
| match modulus { |
| 2 => { |
| r.push((buf >> 10) as u8); |
| } |
| 3 => { |
| r.push((buf >> 16) as u8); |
| r.push((buf >> 8 ) as u8); |
| } |
| 0 => (), |
| _ => return Err(InvalidBase64Length), |
| } |
| |
| Ok(r) |
| } |
| } |
| |
| impl<'a, T: ?Sized + FromBase64> FromBase64 for &'a T { |
| fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { |
| (**self).from_base64() |
| } |
| } |
| |
| /// Base64 decoding lookup table, generated using: |
| /// |
| /// ``` |
| /// let mut ch = 0u8; |
| /// for ch in 0..255 { |
| /// let mut ch = ch as u8; |
| /// let code = match ch { |
| /// b'A'...b'Z' => ch - 0x41, |
| /// b'a'...b'z' => ch - 0x47, |
| /// b'0'...b'9' => ch + 0x04, |
| /// b'+' | b'-' => 0x3E, |
| /// b'/' | b'_' => 0x3F, |
| /// b'=' => 0xFE, |
| /// b'\r' | b'\n' => 0xFD, |
| /// _ => 0xFF, |
| /// }; |
| /// print!("0x{:02X}, ", code); |
| /// if ch % 16 == 15 { println!(""); } |
| /// else if ch == 0xFF { break; } |
| /// ch += 1; |
| /// } |
| /// println!(""); |
| /// ``` |
| const DECODE_TABLE: [u8; 256] = [ |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0x3E, 0xFF, 0x3F, |
| 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, |
| 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, |
| 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, |
| 0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, |
| 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
| ]; |
| const INVALID_CODE: u8 = 0xFF; |
| const EQUALS_CODE: u8 = 0xFE; |
| const NEWLINE_CODE: u8 = 0xFD; |
| const SPECIAL_CODES_START: u8 = NEWLINE_CODE; |
| |
| #[cfg(test)] |
| mod tests { |
| use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE}; |
| |
| #[test] |
| fn test_to_base64_basic() { |
| assert_eq!("".as_bytes().to_base64(STANDARD), ""); |
| assert_eq!("f".as_bytes().to_base64(STANDARD), "Zg=="); |
| assert_eq!("fo".as_bytes().to_base64(STANDARD), "Zm8="); |
| assert_eq!("foo".as_bytes().to_base64(STANDARD), "Zm9v"); |
| assert_eq!("foob".as_bytes().to_base64(STANDARD), "Zm9vYg=="); |
| assert_eq!("fooba".as_bytes().to_base64(STANDARD), "Zm9vYmE="); |
| assert_eq!("foobar".as_bytes().to_base64(STANDARD), "Zm9vYmFy"); |
| } |
| |
| #[test] |
| fn test_to_base64_crlf_line_break() { |
| assert!(![0; 1000].to_base64(Config {line_length: None, ..STANDARD}) |
| .contains("\r\n")); |
| assert_eq!(b"foobar".to_base64(Config {line_length: Some(4), |
| ..STANDARD}), |
| "Zm9v\r\nYmFy"); |
| } |
| |
| #[test] |
| fn test_to_base64_lf_line_break() { |
| assert!(![0; 1000].to_base64(Config {line_length: None, |
| newline: Newline::LF, |
| ..STANDARD}) |
| .contains("\n")); |
| assert_eq!(b"foobar".to_base64(Config {line_length: Some(4), |
| newline: Newline::LF, |
| ..STANDARD}), |
| "Zm9v\nYmFy"); |
| } |
| |
| #[test] |
| fn test_to_base64_padding() { |
| assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg"); |
| assert_eq!("fo".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zm8"); |
| } |
| |
| #[test] |
| fn test_to_base64_url_safe() { |
| assert_eq!([251, 255].to_base64(URL_SAFE), "-_8"); |
| assert_eq!([251, 255].to_base64(STANDARD), "+/8="); |
| } |
| |
| #[test] |
| fn test_to_base64_empty_line_length() { |
| [].to_base64(Config {line_length: Some(72), ..STANDARD}); |
| } |
| |
| #[test] |
| fn test_from_base64_basic() { |
| assert_eq!("".from_base64().unwrap(), b""); |
| assert_eq!("Zg==".from_base64().unwrap(), b"f"); |
| assert_eq!("Zm8=".from_base64().unwrap(), b"fo"); |
| assert_eq!("Zm9v".from_base64().unwrap(), b"foo"); |
| assert_eq!("Zm9vYg==".from_base64().unwrap(), b"foob"); |
| assert_eq!("Zm9vYmE=".from_base64().unwrap(), b"fooba"); |
| assert_eq!("Zm9vYmFy".from_base64().unwrap(), b"foobar"); |
| } |
| |
| #[test] |
| fn test_from_base64_bytes() { |
| assert_eq!(b"Zm9vYmFy".from_base64().unwrap(), b"foobar"); |
| } |
| |
| #[test] |
| fn test_from_base64_newlines() { |
| assert_eq!("Zm9v\r\nYmFy".from_base64().unwrap(), |
| b"foobar"); |
| assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(), |
| b"foob"); |
| assert_eq!("Zm9v\nYmFy".from_base64().unwrap(), |
| b"foobar"); |
| assert_eq!("Zm9vYg==\n".from_base64().unwrap(), |
| b"foob"); |
| } |
| |
| #[test] |
| fn test_from_base64_urlsafe() { |
| assert_eq!("-_8".from_base64().unwrap(), "+/8=".from_base64().unwrap()); |
| } |
| |
| #[test] |
| fn test_from_base64_invalid_char() { |
| assert!("Zm$=".from_base64().is_err()); |
| assert!("Zg==$".from_base64().is_err()); |
| } |
| |
| #[test] |
| fn test_from_base64_invalid_padding() { |
| assert!("Z===".from_base64().is_err()); |
| } |
| |
| #[test] |
| fn test_base64_random() { |
| use rand::{thread_rng, Rng}; |
| |
| for _ in 0..1000 { |
| let times = thread_rng().gen_range(1, 100); |
| let v = thread_rng().gen_iter::<u8>().take(times) |
| .collect::<Vec<_>>(); |
| assert_eq!(v.to_base64(STANDARD) |
| .from_base64() |
| .unwrap(), |
| v); |
| } |
| } |
| } |