blob: 02e4773894fa830e8088229d5d384b5d5c7aeeb4 [file] [log] [blame]
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2012, 2010 Zheng Liu <lz@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use {
crate::{
readers::Reader,
structs::{
BlockGroupDesc32, DirEntry2, EntryType, Extent, ExtentHeader, INode,
InvalidAddressErrorType, ParseToStruct, ParsingError, SuperBlock, FIRST_BG_PADDING,
MIN_EXT4_SIZE, ROOT_INODE_NUM,
},
},
fuchsia_vfs_pseudo_fs_mt::{
directory::immutable, file::pcb::asynchronous::read_only_const, tree_builder::TreeBuilder,
},
std::{
mem::size_of,
path::{Component, Path},
str,
sync::Arc,
},
};
// Assuming/ensuring that we are on a 64bit system where u64 == usize.
assert_eq_size!(u64, usize);
pub struct Parser<T: Reader> {
reader: Arc<T>,
super_block: Option<Arc<SuperBlock>>,
}
/// EXT4 Parser
///
/// Takes in a `Reader` that is able to read arbitrary chunks of data from the filesystem image.
///
/// Basic use:
/// let mut parser = Parser::new(VecReader::new(vec_of_u8));
/// let tree = parser.build_fuchsia_tree()
impl<T: 'static + Reader> Parser<T> {
pub fn new(reader: T) -> Self {
Parser { reader: Arc::new(reader), super_block: None }
}
/// Returns the Super Block.
///
/// If the super block has been parsed and saved before, return that.
/// Else, parse the super block and save it and return it.
///
/// We never need to re-parse the super block in this read-only
/// implementation.
fn super_block(&mut self) -> Result<Arc<SuperBlock>, ParsingError> {
// Neither Option helper works here:
// - `get_or_insert` will still call `SuperBlock::parse` first, and
// thus call every time.
// - `get_or_insert_with` will not be able to propagate a ParsingError
// back to here.
// Writing our own logic instead.
match &self.super_block {
Some(val) => Ok(val.clone()),
None => {
let sb = SuperBlock::parse(self.reader.clone())?;
self.super_block = Some(sb.clone());
Ok(sb)
}
}
}
/// Reads block size from the Super Block.
fn block_size(&mut self) -> Result<usize, ParsingError> {
self.super_block()?.block_size()
}
/// Reads full raw data from a given block number.
fn block(&mut self, block_number: u64) -> Result<Box<[u8]>, ParsingError> {
if block_number == 0 {
return Err(ParsingError::InvalidAddress(
InvalidAddressErrorType::Lower,
0,
FIRST_BG_PADDING,
));
}
let block_size = self.block_size()?;
let address = (block_number as usize)
.checked_mul(block_size)
.ok_or(ParsingError::BlockNumberOutOfBounds(block_number))?;
let mut data = vec![0u8; block_size];
self.reader.read(address, data.as_mut_slice()).map_err(Into::<ParsingError>::into)?;
Ok(data.into_boxed_slice())
}
/// Reads the INode at the given inode number.
fn inode(&mut self, inode_number: u32) -> Result<Arc<INode>, ParsingError> {
if inode_number < 1 {
// INode number 0 is not allowed per ext4 spec.
return Err(ParsingError::InvalidInode(inode_number));
}
let sb = self.super_block()?;
let block_size = self.block_size()?;
// The first Block Group starts with:
// - 1024 byte padding
// - 1024 byte Super Block
// Then in the next block, there are many blocks worth of Block Group Descriptors.
// If the block size is 2048 bytes or larger, then the 1024 byte padding, and the
// Super Block both fit in the first block (0), and the Block Group Descriptors start
// at block 1.
//
// A 1024 byte block size means the padding takes block 0 and the Super Block takes
// block 1. This means the Block Group Descriptors start in block 2.
let bg_descriptor_offset = if block_size >= MIN_EXT4_SIZE {
// Padding and Super Block both fit in the first block, so offset to the next
// block.
block_size
} else {
// Block size is less than 2048. The only valid block size smaller than 2048 is 1024.
// Padding and Super Block take one block each, so offset to the third block.
block_size * 2
};
// TODO(vfcc): Only checking the first block group.
// There are potentially N block groups.
let bgd = BlockGroupDesc32::parse_offset(
self.reader.clone(),
bg_descriptor_offset,
ParsingError::InvalidBlockGroupDesc(block_size),
)?;
// Offset could really be anywhere, and the Reader will enforce reading within the
// filesystem size. Not much can be checked here.
let inode_table_offset = (inode_number - 1) as usize % sb.e2fs_ipg.get() as usize
* sb.e2fs_inode_size.get() as usize;
let inode_addr = (bgd.ext2bgd_i_tables.get() as usize * block_size) + inode_table_offset;
if inode_addr < MIN_EXT4_SIZE {
return Err(ParsingError::InvalidAddress(
InvalidAddressErrorType::Lower,
inode_addr,
MIN_EXT4_SIZE,
));
}
INode::parse_offset(
self.reader.clone(),
inode_addr,
ParsingError::InvalidInode(inode_number),
)
}
/// Helper function to get the root directory INode.
fn root_inode(&mut self) -> Result<Arc<INode>, ParsingError> {
self.inode(ROOT_INODE_NUM)
}
/// Read all raw data from a given extent leaf node.
fn extent_data(
&mut self,
extent: &Extent,
mut allowance: usize,
) -> Result<Vec<u8>, ParsingError> {
let block_number = extent.target_block_num();
let block_count = extent.e_len.get();
let block_size = self.block_size()?;
let mut read_len;
let mut data = Vec::with_capacity(block_size * block_count as usize);
for i in 0..block_count {
let block_data = self.block(block_number + i as u64)?;
if allowance >= block_size {
read_len = block_size;
} else {
read_len = allowance;
}
let block_data = &block_data[0..read_len];
data.append(&mut block_data.to_vec());
allowance -= read_len;
}
Ok(data)
}
/// List of directory entries from the directory that is the given Inode.
///
/// Errors if the Inode does not map to a Directory.
///
/// Currently only supports a single block of DirEntrys, with only one extent entry.
fn entries_from_inode(
&mut self,
inode: Arc<INode>,
) -> Result<Vec<Arc<DirEntry2>>, ParsingError> {
let root_extent = inode.root_extent_header()?;
// TODO(vfcc): Will use entry_count when we support having N entries.
let block_size = self.block_size()?;
let mut entries = Vec::new();
match root_extent.eh_depth.get() {
0 => {
let offset = size_of::<ExtentHeader>();
let e_offset = offset + size_of::<ExtentHeader>();
if e_offset > inode.e2di_blocks.len() {
return Err(ParsingError::InvalidAddress(
InvalidAddressErrorType::Upper,
e_offset,
inode.e2di_blocks.len(),
));
}
let e = Extent::to_struct_ref(
&(inode.e2di_blocks)[offset..e_offset],
ParsingError::InvalidExtent(offset),
)?;
let mut index = 0usize;
let start_index = e.target_block_num() as usize * block_size;
// The `e2d_reclen` of the last entry will be large enough be approximately the
// end of the block.
while (index + size_of::<DirEntry2>()) < block_size {
let offset = index + start_index;
let de = DirEntry2::parse_offset(
self.reader.clone(),
offset,
ParsingError::InvalidDirEntry2(offset),
)?;
index += de.e2d_reclen.get() as usize;
entries.push(de);
}
Ok(entries)
}
_ => {
//TODO(vfcc): Support nested extent nodes.
return Err(ParsingError::Incompatible(
"Nested extents are not supported".to_string(),
));
}
}
}
/// Get any DirEntry2 that isn't root.
///
/// Root doesn't have a DirEntry2.
///
/// When dynamic loading of files is supported, this is the required mechanism.
pub fn entry_at_path(&mut self, path: &Path) -> Result<Arc<DirEntry2>, ParsingError> {
let root_inode = self.root_inode()?;
let root_entries = self.entries_from_inode(root_inode)?;
let mut entry_map = DirEntry2::as_hash_map(root_entries)?;
let mut components = path.components().peekable();
let mut component = components.next();
while component != None {
match component {
Some(Component::RootDir) => {
// Skip
}
Some(Component::Normal(name)) => {
let name = name.to_str().ok_or(ParsingError::InvalidInputPath)?;
if let Some(entry) = entry_map.remove(name) {
if components.peek() == None {
return Ok(entry);
}
match EntryType::from_u8(entry.e2d_type)? {
EntryType::Directory => {
let inode = self.inode(entry.e2d_ino.get())?;
entry_map =
DirEntry2::as_hash_map(self.entries_from_inode(inode)?)?;
}
_ => {
break;
}
}
}
}
_ => {
break;
}
}
component = components.next();
}
match path.to_str() {
Some(s) => Err(ParsingError::PathNotFound(s.to_string())),
None => Err(ParsingError::PathNotFound(
"Bad path - was not able to convert into string".to_string(),
)),
}
}
/// Read all raw data for a given file (directory entry).
///
/// Currently does not support extent trees, only basic "flat" trees.
pub fn read_file(&mut self, entry: Arc<DirEntry2>) -> Result<Vec<u8>, ParsingError> {
let inode = self.inode(entry.e2d_ino.get())?;
let root_extent = inode.root_extent_header()?;
let entry_count = root_extent.eh_ecount.get();
let mut size_remaining = inode.size();
let mut data = Vec::with_capacity(inode.size());
match root_extent.eh_depth.get() {
0 => {
for i in 0..entry_count {
let offset: usize =
size_of::<ExtentHeader>() + (size_of::<Extent>() * i as usize);
let e_offset = offset + size_of::<Extent>();
if e_offset > inode.e2di_blocks.len() {
return Err(ParsingError::InvalidAddress(
InvalidAddressErrorType::Upper,
e_offset,
inode.e2di_blocks.len(),
));
}
let e = Extent::to_struct_ref(
&(inode.e2di_blocks)[offset..e_offset],
ParsingError::InvalidExtent(offset),
)?;
let mut extent_data = self.extent_data(e, size_remaining)?;
let extent_len = extent_data.len();
if extent_len > size_remaining {
return Err(ParsingError::ExtentUnexpectedLength(
extent_len,
size_remaining,
));
}
size_remaining -= extent_data.len();
data.append(&mut extent_data);
}
}
_ => {
//TODO(vfcc): Support nested extent nodes.
return Err(ParsingError::Incompatible(
"Nested extents are not supported".to_string(),
));
}
};
Ok(data)
}
/// Progress through the entire directory tree starting from the given INode.
///
/// If given the root directory INode, this will process through every directory entry in the
/// filesystem in a DFS manner.
///
/// Takes in a closure that will be called for each entry found.
/// Closure should return `Ok(true)` in order to continue the process, otherwise the process
/// will stop.
///
/// Returns Ok(true) if it has indexed its subtree successfully. Otherwise, if the receiver
/// chooses to cancel indexing early, an Ok(false) is returned and propagated up.
pub fn index<R>(
&mut self,
inode: Arc<INode>,
prefix: Vec<&str>,
receiver: &mut R,
) -> Result<bool, ParsingError>
where
R: FnMut(&mut Parser<T>, Vec<&str>, Arc<DirEntry2>) -> Result<bool, ParsingError>,
{
let entries = self.entries_from_inode(inode)?;
for entry in entries {
let entry_name = entry.name()?;
if entry_name == "." || entry_name == ".." {
continue;
}
let mut name = Vec::new();
name.append(&mut prefix.clone());
name.push(entry_name);
if !receiver(self, name.clone(), entry.clone())? {
return Ok(false);
}
if EntryType::from_u8(entry.e2d_type)? == EntryType::Directory {
let inode = self.inode(entry.e2d_ino.get())?;
if !self.index(inode, name, receiver)? {
return Ok(false);
}
}
}
Ok(true)
}
/// Returns a `Simple` filesystem as built by `TreeBuilder.build()`.
pub fn build_fuchsia_tree(&mut self) -> Result<Arc<immutable::Simple>, ParsingError> {
let root_inode = self.root_inode()?;
let mut tree = TreeBuilder::empty_dir();
self.index(root_inode, Vec::new(), &mut |my_self, path, entry| {
let entry_type = EntryType::from_u8(entry.e2d_type)?;
match entry_type {
EntryType::RegularFile => {
let data = my_self.read_file(entry)?;
tree.add_entry(path.clone(), read_only_const(data.clone()))
.map_err(|_| ParsingError::BadFile(path.join("/")))?;
}
EntryType::Directory => {
tree.add_empty_dir(path.clone())
.map_err(|_| ParsingError::BadDirectory(path.join("/")))?;
}
_ => {
// TODO(vfcc): Handle other types.
}
}
Ok(true)
})?;
Ok(tree.build())
}
}
#[cfg(test)]
mod tests {
use {
crate::{readers::VecReader, structs::EntryType},
crypto::{digest::Digest, sha2::Sha256},
std::{collections::HashSet, fs, path::Path, str},
};
#[test]
fn list_root_1_file() {
let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
let root_inode = parser.root_inode().expect("Parse INode");
let entries = parser.entries_from_inode(root_inode).expect("List entries");
let mut expected_entries = vec!["file1", "lost+found", "..", "."];
for de in &entries {
assert_eq!(expected_entries.pop().unwrap(), de.name().unwrap());
}
assert_eq!(expected_entries.len(), 0);
}
#[test]
fn list_root() {
let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
let root_inode = parser.root_inode().expect("Parse INode");
let entries = parser.entries_from_inode(root_inode).expect("List entries");
let mut expected_entries = vec!["inner", "file1", "lost+found", "..", "."];
for de in &entries {
assert_eq!(expected_entries.pop().unwrap(), de.name().unwrap());
}
assert_eq!(expected_entries.len(), 0);
}
#[test]
fn get_from_path() {
let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
let entry = parser.entry_at_path(Path::new("/inner")).expect("Entry at path");
assert_eq!(entry.e2d_ino.get(), 12);
assert_eq!(entry.name().unwrap(), "inner");
let entry = parser.entry_at_path(Path::new("/inner/file2")).expect("Entry at path");
assert_eq!(entry.e2d_ino.get(), 17);
assert_eq!(entry.name().unwrap(), "file2");
}
#[test]
fn read_file() {
let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
let entry = parser.entry_at_path(Path::new("file1")).expect("Entry at path");
assert_eq!(entry.e2d_ino.get(), 15);
assert_eq!(entry.name().unwrap(), "file1");
let data = parser.read_file(entry).expect("File data");
let compare = "file1 contents.\n";
assert_eq!(data.len(), compare.len());
assert_eq!(str::from_utf8(data.as_slice()).expect("File data"), compare);
}
#[test]
fn fail_inode_zero() {
let data = fs::read("/pkg/data/1file.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.inode(0).is_err());
}
#[test]
fn index() {
let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
let mut count = 0;
let mut entries: HashSet<u32> = HashSet::new();
let root_inode = parser.root_inode().expect("Root inode");
parser
.index(root_inode, Vec::new(), &mut |_, _, entry| {
count += 1;
// Make sure each inode only appears once.
assert_ne!(entries.contains(&entry.e2d_ino.get()), true);
entries.insert(entry.e2d_ino.get());
Ok(true)
})
.expect("Index");
assert_eq!(count, 4);
}
#[test]
fn check_data() {
let data = fs::read("/pkg/data/nest.img").expect("Unable to read file");
let mut parser = super::Parser::new(VecReader::new(data));
assert!(parser.super_block().expect("Super Block").check_magic().is_ok());
let root_inode = parser.root_inode().expect("Root inode");
parser
.index(root_inode, Vec::new(), &mut |my_self, path, entry| {
let entry_type = EntryType::from_u8(entry.e2d_type).expect("Entry Type");
match entry_type {
EntryType::RegularFile => {
let data = my_self.read_file(entry).expect("File data");
let mut hasher = Sha256::new();
hasher.input(&data);
if path == vec!["inner", "file2"] {
assert_eq!(
"215ca145cbac95c9e2a6f5ff91ca1887c837b18e5f58fd2a7a16e2e5a3901e10",
hasher.result_str()
);
} else if path == vec!["file1"] {
assert_eq!(
"6bc35bfb2ca96c75a1fecde205693c19a827d4b04e90ace330048f3e031487dd",
hasher.result_str()
);
} else {
assert!(false, "Got an invalid file.");
}
}
EntryType::Directory => {
if path != vec!["inner"] && path != vec!["lost+found"] {
// These should be the only possible directories.
assert!(false, format!("Unexpected path {:?}", path));
}
}
_ => {
assert!(false, "No other types should exist in this image.");
}
}
Ok(true)
})
.expect("Index");
}
}