| // Copyright 2020 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| //! # Reading, Writing and Listing Fuchsia Archives (FAR) Data |
| //! |
| //! This crate is a Rust port of the |
| //! [Go Far package](https://fuchsia.googlesource.com/fuchsia/+/HEAD/garnet/go/src/far/). |
| //! |
| //! # Example |
| //! |
| //! ``` |
| //! use anyhow::Error; |
| //! use std::collections::BTreeMap; |
| //! use std::fs; |
| //! use std::io::{Cursor, Read, Write}; |
| //! use tempfile::TempDir; |
| //! |
| //! fn create_test_files(file_names: &[&str]) -> Result<TempDir, Error> { |
| //! let tmp_dir = TempDir::new()?; |
| //! for file_name in file_names { |
| //! let file_path = tmp_dir.path().join(file_name); |
| //! let parent_dir = file_path.parent().unwrap(); |
| //! fs::create_dir_all(&parent_dir)?; |
| //! let file_path = tmp_dir.path().join(file_name); |
| //! let mut tmp_file = fs::File::create(&file_path)?; |
| //! writeln!(tmp_file, "{}", file_name)?; |
| //! } |
| //! Ok(tmp_dir) |
| //! } |
| //! |
| //! let file_names = ["b", "a", "dir/c"]; |
| //! let test_dir = create_test_files(&file_names).unwrap(); |
| //! let mut path_content_map: BTreeMap<&str, (u64, Box<dyn Read>)> = BTreeMap::new(); |
| //! for file_name in file_names.iter() { |
| //! let file = fs::File::open(test_dir.path().join(file_name)).unwrap(); |
| //! path_content_map.insert(file_name, (file.metadata().unwrap().len(), Box::new(file))); |
| //! } |
| //! let mut result = Vec::new(); |
| //! fuchsia_archive::write(&mut result, path_content_map).unwrap(); |
| //! let result = &result[..]; |
| //! |
| //! let reader = fuchsia_archive::Reader::new(Cursor::new(result)).unwrap(); |
| //! let entries = reader.list().map(|e| e.path()).collect::<Vec<_>>(); |
| //! assert_eq!(entries, ["a", "b", "dir/c"]); |
| //! ``` |
| |
| #![allow(clippy::let_unit_value)] |
| // TODO(https://fxbug.dev/42073005): Remove this allow once the lint is fixed. |
| #![allow(unknown_lints, clippy::extra_unused_type_parameters)] |
| |
| use zerocopy::byteorder::little_endian::{U16, U32, U64}; |
| |
| mod error; |
| pub use error::Error; |
| |
| mod name; |
| |
| mod read; |
| pub use read::Reader; |
| |
| mod utf8_reader; |
| pub use utf8_reader::Utf8Reader; |
| |
| mod async_read; |
| pub use async_read::AsyncReader; |
| |
| mod async_utf8_reader; |
| pub use async_utf8_reader::AsyncUtf8Reader; |
| |
| mod write; |
| pub use write::write; |
| |
| pub const MAGIC_INDEX_VALUE: [u8; 8] = [0xc8, 0xbf, 0x0b, 0x48, 0xad, 0xab, 0xc5, 0x11]; |
| |
| pub type ChunkType = [u8; 8]; |
| |
| pub const DIR_CHUNK_TYPE: ChunkType = *b"DIR-----"; |
| pub const DIR_NAMES_CHUNK_TYPE: ChunkType = *b"DIRNAMES"; |
| |
| #[derive( |
| PartialEq, |
| Eq, |
| Debug, |
| Clone, |
| Copy, |
| Default, |
| zerocopy::AsBytes, |
| zerocopy::FromZeros, |
| zerocopy::FromBytes, |
| zerocopy::NoCell, |
| )] |
| #[repr(C)] |
| struct Index { |
| magic: [u8; 8], |
| length: U64, |
| } |
| |
| const INDEX_LEN: u64 = std::mem::size_of::<Index>() as u64; |
| |
| #[derive( |
| PartialEq, |
| Eq, |
| Debug, |
| Clone, |
| Copy, |
| Default, |
| zerocopy::AsBytes, |
| zerocopy::FromZeros, |
| zerocopy::FromBytes, |
| zerocopy::NoCell, |
| )] |
| #[repr(C)] |
| struct IndexEntry { |
| chunk_type: ChunkType, |
| offset: U64, |
| length: U64, |
| } |
| |
| const INDEX_ENTRY_LEN: u64 = std::mem::size_of::<IndexEntry>() as u64; |
| |
| #[derive( |
| PartialEq, |
| Eq, |
| Debug, |
| Clone, |
| Copy, |
| Default, |
| zerocopy::AsBytes, |
| zerocopy::FromZeros, |
| zerocopy::FromBytes, |
| zerocopy::NoCell, |
| )] |
| #[repr(C)] |
| struct DirectoryEntry { |
| name_offset: U32, |
| name_length: U16, |
| reserved: U16, |
| data_offset: U64, |
| data_length: U64, |
| reserved2: U64, |
| } |
| |
| const DIRECTORY_ENTRY_LEN: u64 = std::mem::size_of::<DirectoryEntry>() as u64; |
| const CONTENT_ALIGNMENT: u64 = 4096; |
| |
| /// An entry in an archive, returned by Reader::list |
| #[derive(Debug, PartialEq, Eq)] |
| pub struct Entry<'a> { |
| path: &'a [u8], |
| offset: u64, |
| length: u64, |
| } |
| |
| impl<'a> Entry<'a> { |
| /// The path of the entry. |
| pub fn path(&self) -> &'a [u8] { |
| self.path |
| } |
| |
| /// The offset in bytes of the entry's content chunk. |
| pub fn offset(&self) -> u64 { |
| self.offset |
| } |
| |
| /// The length in bytes of the entry's content chunk. |
| pub fn length(&self) -> u64 { |
| self.length |
| } |
| } |
| |
| /// An entry in a UTF-8 archive, returned by Reader::list |
| #[derive(Debug, PartialEq, Eq)] |
| pub struct Utf8Entry<'a> { |
| path: &'a str, |
| offset: u64, |
| length: u64, |
| } |
| |
| impl<'a> Utf8Entry<'a> { |
| /// The path of the entry. |
| pub fn path(&self) -> &'a str { |
| self.path |
| } |
| |
| /// The offset in bytes of the entry's content chunk. |
| pub fn offset(&self) -> u64 { |
| self.offset |
| } |
| |
| /// The length in bytes of the entry's content chunk. |
| pub fn length(&self) -> u64 { |
| self.length |
| } |
| } |
| |
| fn validate_directory_entries_and_paths( |
| directory_entries: &[DirectoryEntry], |
| path_data: &[u8], |
| stream_len: u64, |
| end_of_last_non_content_chunk: u64, |
| ) -> Result<(), Error> { |
| let mut previous_name: Option<&[u8]> = None; |
| let mut previous_entry: Option<&DirectoryEntry> = None; |
| for (i, entry) in directory_entries.iter().enumerate() { |
| let name = validate_name_for_entry(entry, i, path_data, previous_name)?; |
| let () = validate_content_chunk( |
| entry, |
| previous_entry, |
| name, |
| stream_len, |
| end_of_last_non_content_chunk, |
| )?; |
| previous_name = Some(name); |
| previous_entry = Some(entry); |
| } |
| Ok(()) |
| } |
| |
| // Obtain name for current directory entry, making sure it is a valid name and lexicographically |
| // greater than the previous name. |
| fn validate_name_for_entry<'a>( |
| entry: &DirectoryEntry, |
| entry_index: usize, |
| path_data: &'a [u8], |
| previous_name: Option<&[u8]>, |
| ) -> Result<&'a [u8], Error> { |
| let offset = entry.name_offset.get().into_usize(); |
| if offset >= path_data.len() { |
| return Err(Error::PathDataOffsetTooLarge { |
| entry_index, |
| offset, |
| chunk_size: path_data.len(), |
| }); |
| } |
| |
| let end = offset + usize::from(entry.name_length.get()); |
| if end > path_data.len() { |
| return Err(Error::PathDataLengthTooLarge { |
| entry_index, |
| offset, |
| length: entry.name_length.get(), |
| chunk_size: path_data.len(), |
| }); |
| } |
| |
| let name = crate::name::validate_name(&path_data[offset..end])?; |
| |
| // Directory entries must be strictly increasing by name |
| if let Some(previous_name) = previous_name { |
| if previous_name >= name { |
| return Err(Error::DirectoryEntriesOutOfOrder { |
| entry_index, |
| previous_name: previous_name.into(), |
| name: name.into(), |
| }); |
| } |
| } |
| Ok(name) |
| } |
| |
| fn validate_content_chunk( |
| entry: &DirectoryEntry, |
| previous_entry: Option<&DirectoryEntry>, |
| name: &[u8], |
| stream_len: u64, |
| end_of_last_non_content_chunk: u64, |
| ) -> Result<(), Error> { |
| // Chunks must be non-overlapping and tightly packed |
| let expected_offset = if let Some(previous_entry) = previous_entry { |
| // Both the addition and rounding were checked when the previous entry was validated. |
| (previous_entry.data_offset.get() + previous_entry.data_length.get()) |
| .next_multiple_of(CONTENT_ALIGNMENT) |
| } else { |
| end_of_last_non_content_chunk |
| .checked_next_multiple_of(CONTENT_ALIGNMENT) |
| .ok_or(Error::ContentChunkOffsetOverflow)? |
| }; |
| if entry.data_offset.get() != expected_offset { |
| return Err(Error::InvalidContentChunkOffset { |
| name: name.into(), |
| expected: expected_offset, |
| actual: entry.data_offset.get(), |
| }); |
| } |
| |
| // Chunks must be contained in the archive |
| let stream_len_lower_bound = entry |
| .data_offset |
| .get() |
| .checked_add(entry.data_length.get()) |
| .and_then(|end| end.checked_next_multiple_of(CONTENT_ALIGNMENT)) |
| .ok_or_else(|| Error::ContentChunkEndOverflow { |
| name: name.into(), |
| offset: entry.data_offset.get(), |
| length: entry.data_length.get(), |
| })?; |
| if stream_len_lower_bound > stream_len { |
| return Err(Error::ContentChunkBeyondArchive { |
| name: name.into(), |
| lower_bound: stream_len_lower_bound, |
| archive_size: stream_len, |
| }); |
| } |
| Ok(()) |
| } |
| |
| // Return an iterator over the items in an archive. |
| fn list<'a>( |
| directory_entries: &'a [DirectoryEntry], |
| path_data: &'a [u8], |
| ) -> impl ExactSizeIterator<Item = Entry<'a>> { |
| directory_entries.iter().map(|e| Entry { |
| path: &path_data[e.name_offset.get().into_usize()..][..usize::from(e.name_length.get())], |
| offset: e.data_offset.get(), |
| length: e.data_length.get(), |
| }) |
| } |
| |
| // Returns the directory entry with path `target_path`, or an error if there is not one. |
| // O(log(# directory entries)) |
| fn find_directory_entry<'a>( |
| directory_entries: &'a [DirectoryEntry], |
| path_data: &'_ [u8], |
| target_path: &'_ [u8], |
| ) -> Result<&'a DirectoryEntry, Error> { |
| // FAR spec requires, and [Async]Reader::new enforces, that directory entries are sorted by |
| // path data |
| // https://fuchsia.dev/fuchsia-src/development/source_code/archive_format?hl=en#directory_chunk_type_dir----- |
| let i = directory_entries |
| .binary_search_by_key(&target_path, |e| { |
| &path_data[e.name_offset.get().into_usize()..][..usize::from(e.name_length.get())] |
| }) |
| .map_err(|_| Error::PathNotPresent(target_path.into()))?; |
| Ok(directory_entries.get(i).expect("binary_search on success returns in-bounds index")) |
| } |
| |
| trait SafeIntegerConversion { |
| fn into_usize(self) -> usize; |
| } |
| |
| impl SafeIntegerConversion for u32 { |
| fn into_usize(self) -> usize { |
| static_assertions::const_assert!( |
| std::mem::size_of::<u32>() <= std::mem::size_of::<usize>() |
| ); |
| self as usize |
| } |
| } |
| |
| #[cfg(test)] |
| pub(crate) mod tests { |
| use { |
| super::*, |
| std::io::{Cursor, Read as _, Seek as _, SeekFrom, Write as _}, |
| zerocopy::AsBytes as _, |
| }; |
| |
| pub(crate) fn example_archive() -> Vec<u8> { |
| let mut b: Vec<u8> = vec![0; 16384]; |
| #[rustfmt::skip] |
| let header = vec![ |
| /* magic */ |
| 0xc8, 0xbf, 0x0b, 0x48, 0xad, 0xab, 0xc5, 0x11, |
| /* length of index entries */ |
| 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* index entry for directory chunk */ |
| /* chunk type */ |
| 0x44, 0x49, 0x52, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, |
| /* offset to chunk */ |
| 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* length of chunk */ |
| 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* index entry for directory names chunk */ |
| /* chunk type */ |
| 0x44, 0x49, 0x52, 0x4e, 0x41, 0x4d, 0x45, 0x53, |
| /* offset to chunk */ |
| 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* length of chunk */ |
| 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* directory chunk */ |
| /* directory table entry for path "a" */ |
| 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, |
| 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* directory table entry for path "b" */ |
| 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, |
| 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* directory table entry for path "dir/c" */ |
| 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, |
| 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| /* directory names chunk with one byte of padding */ |
| b'a', b'b', b'd', b'i', b'r', b'/', b'c', 0x00, |
| ]; |
| b[0..header.len()].copy_from_slice(header.as_slice()); |
| let content_a = b"a\n"; |
| let a_loc = 4096; |
| b[a_loc..a_loc + content_a.len()].copy_from_slice(content_a); |
| let content_b = b"b\n"; |
| let b_loc = 8192; |
| b[b_loc..b_loc + content_b.len()].copy_from_slice(content_b); |
| let content_c = b"dir/c\n"; |
| let c_loc = 12288; |
| b[c_loc..c_loc + content_c.len()].copy_from_slice(content_c); |
| b |
| } |
| |
| #[test] |
| fn test_serialize_deserialize_index() { |
| let mut target = Cursor::new(Vec::new()); |
| let index = Index { magic: MAGIC_INDEX_VALUE, length: (2 * INDEX_ENTRY_LEN).into() }; |
| let () = target.write_all(index.as_bytes()).unwrap(); |
| assert_eq!(target.get_ref().len() as u64, INDEX_LEN); |
| assert_eq!(target.seek(SeekFrom::Start(0)).unwrap(), 0); |
| |
| let mut decoded_index = Index::default(); |
| let () = target.get_ref().as_slice().read_exact(decoded_index.as_bytes_mut()).unwrap(); |
| assert_eq!(index, decoded_index); |
| } |
| |
| #[test] |
| fn test_serialize_deserialize_index_entry() { |
| let mut target = Cursor::new(Vec::new()); |
| let index_entry = |
| IndexEntry { chunk_type: DIR_CHUNK_TYPE, offset: 999.into(), length: 444.into() }; |
| let () = target.write_all(index_entry.as_bytes()).unwrap(); |
| assert_eq!(target.get_ref().len() as u64, INDEX_ENTRY_LEN); |
| assert_eq!(target.seek(SeekFrom::Start(0)).unwrap(), 0); |
| |
| let mut decoded_index_entry = IndexEntry::default(); |
| let () = |
| target.get_ref().as_slice().read_exact(decoded_index_entry.as_bytes_mut()).unwrap(); |
| assert_eq!(index_entry, decoded_index_entry); |
| } |
| |
| #[test] |
| fn test_serialize_deserialize_directory_entry() { |
| let mut target = Cursor::new(Vec::new()); |
| let directory_entry = DirectoryEntry { |
| name_offset: 33.into(), |
| name_length: 66.into(), |
| reserved: 0.into(), |
| data_offset: 99.into(), |
| data_length: 1011.into(), |
| reserved2: 0.into(), |
| }; |
| let () = target.write_all(directory_entry.as_bytes()).unwrap(); |
| assert_eq!(target.get_ref().len() as u64, DIRECTORY_ENTRY_LEN); |
| assert_eq!(target.seek(SeekFrom::Start(0)).unwrap(), 0); |
| |
| let mut decoded_directory_entry = DirectoryEntry::default(); |
| let () = |
| target.get_ref().as_slice().read_exact(decoded_directory_entry.as_bytes_mut()).unwrap(); |
| assert_eq!(directory_entry, decoded_directory_entry); |
| } |
| |
| #[test] |
| fn test_struct_sizes() { |
| assert_eq!(INDEX_LEN, 8 + 8); |
| assert_eq!(INDEX_ENTRY_LEN, 8 + 8 + 8); |
| assert_eq!(DIRECTORY_ENTRY_LEN, 4 + 2 + 2 + 8 + 8 + 8); |
| } |
| |
| #[test] |
| fn into_usize_no_panic() { |
| assert_eq!(u32::MAX.into_usize(), u32::MAX.try_into().unwrap()); |
| } |
| } |