blob: bfeeb9d09f33aea0a08d18d1ed5bb161817837db [file] [log] [blame]
// Copyright 2016 `multipart` Crate Developers
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//! Utilities for saving request entries to the filesystem.
use mime::Mime;
use super::field::{MultipartData, MultipartFile, ReadEntry, ReadEntryResult};
use self::SaveResult::*;
pub use tempdir::TempDir;
use std::collections::HashMap;
use std::io::prelude::*;
use std::fs::OpenOptions;
use std::path::{Path, PathBuf};
use std::{env, fs, io, mem};
const RANDOM_FILENAME_LEN: usize = 12;
fn rand_filename() -> String {
::random_alphanumeric(RANDOM_FILENAME_LEN)
}
macro_rules! try_start (
($try:expr) => (
match $try {
Ok(val) => val,
Err(e) => return SaveResult::Error(e),
}
)
);
/// A builder for saving a file or files to the local filesystem.
///
/// ### `OpenOptions`
/// This builder holds an instance of `std::fs::OpenOptions` which is used
/// when creating the new file(s).
///
/// By default, the open options are set with `.write(true).create_new(true)`,
/// so if the file already exists then an error will be thrown. This is to avoid accidentally
/// overwriting files from other requests.
///
/// If you want to modify the options used to open the save file, you can use
/// `mod_open_opts()`.
///
/// ### File Size and Count Limits
/// You can set a size limit for individual files with `size_limit()`, which takes either `u64`
/// or `Option<u64>`.
///
/// You can also set the maximum number of files to process with `count_limit()`, which
/// takes either `u32` or `Option<u32>`. This only has an effect when using
/// `SaveBuilder<[&mut] Multipart>`.
///
/// ### Warning: Do **not** trust user input!
/// It is a serious security risk to create files or directories with paths based on user input.
/// A malicious user could craft a path which can be used to overwrite important files, such as
/// web templates, static assets, Javascript files, database files, configuration files, etc.,
/// if they are writable by the server process.
///
/// This can be mitigated somewhat by setting filesystem permissions as
/// conservatively as possible and running the server under its own user with restricted
/// permissions, but you should still not use user input directly as filesystem paths.
/// If it is truly necessary, you should sanitize user input such that it cannot cause a path to be
/// misinterpreted by the OS. Such functionality is outside the scope of this crate.
#[must_use = "nothing saved to the filesystem yet"]
pub struct SaveBuilder<S> {
savable: S,
open_opts: OpenOptions,
size_limit: Option<u64>,
count_limit: Option<u32>,
}
impl<S> SaveBuilder<S> {
/// Implementation detail but not problematic to have accessible.
#[doc(hidden)]
pub fn new(savable: S) -> SaveBuilder<S> {
let mut open_opts = OpenOptions::new();
open_opts.write(true).create_new(true);
SaveBuilder {
savable: savable,
open_opts: open_opts,
size_limit: None,
count_limit: None,
}
}
/// Set the maximum number of bytes to write out *per file*.
///
/// Can be `u64` or `Option<u64>`. If `None`, clears the limit.
pub fn size_limit<L: Into<Option<u64>>>(mut self, limit: L) -> Self {
self.size_limit = limit.into();
self
}
/// Modify the `OpenOptions` used to open any files for writing.
///
/// The `write` flag will be reset to `true` after the closure returns. (It'd be pretty
/// pointless otherwise, right?)
pub fn mod_open_opts<F: FnOnce(&mut OpenOptions)>(mut self, opts_fn: F) -> Self {
opts_fn(&mut self.open_opts);
self.open_opts.write(true);
self
}
}
/// Save API for whole multipart requests.
impl<M> SaveBuilder<M> where M: ReadEntry {
/// Set the maximum number of files to write out.
///
/// Can be `u32` or `Option<u32>`. If `None`, clears the limit.
pub fn count_limit<L: Into<Option<u32>>>(mut self, count_limit: L) -> Self {
self.count_limit = count_limit.into();
self
}
/// Save the file fields in the request to a new temporary directory prefixed with
/// `multipart-rs` in the OS temporary directory.
///
/// For more options, create a `TempDir` yourself and pass it to `with_temp_dir()` instead.
///
/// ### Note: Temporary
/// See `SaveDir` for more info (the type of `Entries::save_dir`).
pub fn temp(self) -> EntriesSaveResult<M> {
self.temp_with_prefix("multipart-rs")
}
/// Save the file fields in the request to a new temporary directory with the given string
/// as a prefix in the OS temporary directory.
///
/// For more options, create a `TempDir` yourself and pass it to `with_temp_dir()` instead.
///
/// ### Note: Temporary
/// See `SaveDir` for more info (the type of `Entries::save_dir`).
pub fn temp_with_prefix(self, prefix: &str) -> EntriesSaveResult<M> {
match TempDir::new(prefix) {
Ok(tempdir) => self.with_temp_dir(tempdir),
Err(e) => SaveResult::Error(e),
}
}
/// Save the file fields to the given `TempDir`.
///
/// The `TempDir` is returned in the result under `Entries::save_dir`.
pub fn with_temp_dir(self, tempdir: TempDir) -> EntriesSaveResult<M> {
self.with_entries(Entries::new(SaveDir::Temp(tempdir)))
}
/// Save the file fields in the request to a new permanent directory with the given path.
///
/// Any nonexistent directories in the path will be created.
pub fn with_dir<P: Into<PathBuf>>(self, dir: P) -> EntriesSaveResult<M> {
let dir = dir.into();
try_start!(create_dir_all(&dir));
self.with_entries(Entries::new(SaveDir::Perm(dir.into())))
}
/// Commence the save operation using the existing `Entries` instance.
///
/// May be used to resume a saving operation after handling an error.
pub fn with_entries(mut self, mut entries: Entries) -> EntriesSaveResult<M> {
let mut count = 0;
loop {
let field = match ReadEntry::read_entry(self.savable) {
ReadEntryResult::Entry(field) => field,
ReadEntryResult::End(_) => break,
ReadEntryResult::Error(_, e) => return Partial (
PartialEntries {
entries: entries,
partial_file: None,
},
e.into(),
)
};
match field.data {
MultipartData::File(mut file) => {
match self.count_limit {
Some(limit) if count >= limit => return Partial (
PartialEntries {
entries: entries,
partial_file: Some(PartialFileField {
field_name: field.name,
source: file,
dest: None,
})
},
PartialReason::CountLimit,
),
_ => (),
}
count += 1;
match file.save().size_limit(self.size_limit).with_dir(&entries.save_dir) {
Full(saved_file) => {
self.savable = file.take_inner();
entries.mut_files_for(field.name).push(saved_file);
},
Partial(partial, reason) => return Partial(
PartialEntries {
entries: entries,
partial_file: Some(PartialFileField {
field_name: field.name,
source: file,
dest: Some(partial)
})
},
reason
),
Error(e) => return Partial(
PartialEntries {
entries: entries,
partial_file: Some(PartialFileField {
field_name: field.name,
source: file,
dest: None,
}),
},
e.into(),
),
}
},
MultipartData::Text(mut text) => {
self.savable = text.take_inner();
entries.fields.insert(field.name, text.text);
},
}
}
SaveResult::Full(entries)
}
}
/// Save API for individual files.
impl<'m, M: 'm> SaveBuilder<&'m mut MultipartFile<M>> where MultipartFile<M>: BufRead {
/// Save to a file with a random alphanumeric name in the OS temporary directory.
///
/// Does not use user input to create the path.
///
/// See `with_path()` for more details.
pub fn temp(&mut self) -> FileSaveResult {
let path = env::temp_dir().join(rand_filename());
self.with_path(path)
}
/// Save to a file with the given name in the OS temporary directory.
///
/// See `with_path()` for more details.
///
/// ### Warning: Do **not* trust user input!
/// It is a serious security risk to create files or directories with paths based on user input.
/// A malicious user could craft a path which can be used to overwrite important files, such as
/// web templates, static assets, Javascript files, database files, configuration files, etc.,
/// if they are writable by the server process.
///
/// This can be mitigated somewhat by setting filesystem permissions as
/// conservatively as possible and running the server under its own user with restricted
/// permissions, but you should still not use user input directly as filesystem paths.
/// If it is truly necessary, you should sanitize filenames such that they cannot be
/// misinterpreted by the OS.
pub fn with_filename(&mut self, filename: &str) -> FileSaveResult {
let mut tempdir = env::temp_dir();
tempdir.set_file_name(filename);
self.with_path(tempdir)
}
/// Save to a file with a random alphanumeric name in the given directory.
///
/// See `with_path()` for more details.
///
/// ### Warning: Do **not* trust user input!
/// It is a serious security risk to create files or directories with paths based on user input.
/// A malicious user could craft a path which can be used to overwrite important files, such as
/// web templates, static assets, Javascript files, database files, configuration files, etc.,
/// if they are writable by the server process.
///
/// This can be mitigated somewhat by setting filesystem permissions as
/// conservatively as possible and running the server under its own user with restricted
/// permissions, but you should still not use user input directly as filesystem paths.
/// If it is truly necessary, you should sanitize filenames such that they cannot be
/// misinterpreted by the OS.
pub fn with_dir<P: AsRef<Path>>(&mut self, dir: P) -> FileSaveResult {
let path = dir.as_ref().join(rand_filename());
self.with_path(path)
}
/// Save to a file with the given path.
///
/// Creates any missing directories in the path.
/// Uses the contained `OpenOptions` to create the file.
/// Truncates the file to the given limit, if set.
pub fn with_path<P: Into<PathBuf>>(&mut self, path: P) -> FileSaveResult {
let path = path.into();
let saved = SavedFile {
content_type: self.savable.content_type.clone(),
filename: self.savable.filename.clone(),
path: path,
size: 0,
};
let file = match create_dir_all(&saved.path).and_then(|_| self.open_opts.open(&saved.path)) {
Ok(file) => file,
Err(e) => return Partial(saved, e.into())
};
self.write_to(file).map(move |written| saved.with_size(written))
}
/// Write out the file field to `dest`, truncating if a limit was set.
///
/// Returns the number of bytes copied, and whether or not the limit was reached
/// (tested by `MultipartFile::fill_buf().is_empty()` so no bytes are consumed).
///
/// Retries on interrupts.
pub fn write_to<W: Write>(&mut self, mut dest: W) -> SaveResult<u64, u64> {
if let Some(limit) = self.size_limit {
let copied = match try_copy_buf(self.savable.take(limit), &mut dest) {
Full(copied) => copied,
other => return other,
};
// If there's more data to be read, the field was truncated
match self.savable.fill_buf() {
Ok(buf) if buf.is_empty() => Full(copied),
Ok(_) => Partial(copied, PartialReason::SizeLimit),
Err(e) => Partial(copied, PartialReason::IoError(e))
}
} else {
try_copy_buf(&mut self.savable, &mut dest)
}
}
}
/// A file saved to the local filesystem from a multipart request.
#[derive(Debug)]
pub struct SavedFile {
/// The complete path this file was saved at.
pub path: PathBuf,
/// ### Warning: Client Provided / Untrustworthy
/// You should treat this value as **untrustworthy** because it is an arbitrary string
/// provided by the client.
///
/// It is a serious security risk to create files or directories with paths based on user input.
/// A malicious user could craft a path which can be used to overwrite important files, such as
/// web templates, static assets, Javascript files, database files, configuration files, etc.,
/// if they are writable by the server process.
///
/// This can be mitigated somewhat by setting filesystem permissions as
/// conservatively as possible and running the server under its own user with restricted
/// permissions, but you should still not use user input directly as filesystem paths.
/// If it is truly necessary, you should sanitize filenames such that they cannot be
/// misinterpreted by the OS. Such functionality is outside the scope of this crate.
pub filename: Option<String>,
/// The MIME type (`Content-Type` value) of this file, if supplied by the client,
/// or `"applicaton/octet-stream"` otherwise.
///
/// ### Note: Client Provided
/// Consider this value to be potentially untrustworthy, as it is provided by the client.
/// It may be inaccurate or entirely wrong, depending on how the client determined it.
///
/// Some variants wrap arbitrary strings which could be abused by a malicious user if your
/// application performs any non-idempotent operations based on their value, such as
/// starting another program or querying/updating a database (web-search "SQL injection").
pub content_type: Mime,
/// The number of bytes written to the disk.
pub size: u64,
}
impl SavedFile {
fn with_size(self, size: u64) -> Self {
SavedFile { size: size, .. self }
}
}
/// A result of `Multipart::save_all()`.
#[derive(Debug)]
pub struct Entries {
/// The text fields of the multipart request, mapped by field name -> value.
pub fields: HashMap<String, String>,
/// A map of file field names to their contents saved on the filesystem.
pub files: HashMap<String, Vec<SavedFile>>,
/// The directory the files in this request were saved under; may be temporary or permanent.
pub save_dir: SaveDir,
}
impl Entries {
fn new(save_dir: SaveDir) -> Self {
Entries {
fields: HashMap::new(),
files: HashMap::new(),
save_dir: save_dir,
}
}
/// Returns `true` if both `fields` and `files` are empty, `false` otherwise.
pub fn is_empty(&self) -> bool {
self.fields.is_empty() && self.files.is_empty()
}
fn mut_files_for(&mut self, field: String) -> &mut Vec<SavedFile> {
self.files.entry(field).or_insert_with(Vec::new)
}
}
/// The save directory for `Entries`. May be temporary (delete-on-drop) or permanent.
#[derive(Debug)]
pub enum SaveDir {
/// This directory is temporary and will be deleted, along with its contents, when this wrapper
/// is dropped.
Temp(TempDir),
/// This directory is permanent and will be left on the filesystem when this wrapper is dropped.
///
/// **N.B.** If this directory is in the OS temporary directory then it may still be
/// deleted at any time, usually on reboot or when free space is low.
Perm(PathBuf),
}
impl SaveDir {
/// Get the path of this directory, either temporary or permanent.
pub fn as_path(&self) -> &Path {
use self::SaveDir::*;
match *self {
Temp(ref tempdir) => tempdir.path(),
Perm(ref pathbuf) => &*pathbuf,
}
}
/// Returns `true` if this is a temporary directory which will be deleted on-drop.
pub fn is_temporary(&self) -> bool {
use self::SaveDir::*;
match *self {
Temp(_) => true,
Perm(_) => false,
}
}
/// Unwrap the `PathBuf` from `self`; if this is a temporary directory,
/// it will be converted to a permanent one.
pub fn into_path(self) -> PathBuf {
use self::SaveDir::*;
match self {
Temp(tempdir) => tempdir.into_path(),
Perm(pathbuf) => pathbuf,
}
}
/// If this `SaveDir` is temporary, convert it to permanent.
/// This is a no-op if it already is permanent.
///
/// ### Warning: Potential Data Loss
/// Even though this will prevent deletion on-drop, the temporary folder on most OSes
/// (where this directory is created by default) can be automatically cleared by the OS at any
/// time, usually on reboot or when free space is low.
///
/// It is recommended that you relocate the files from a request which you want to keep to a
/// permanent folder on the filesystem.
pub fn keep(&mut self) {
use self::SaveDir::*;
*self = match mem::replace(self, Perm(PathBuf::new())) {
Temp(tempdir) => Perm(tempdir.into_path()),
old_self => old_self,
};
}
/// Delete this directory and its contents, regardless of its permanence.
///
/// ### Warning: Potential Data Loss
/// This is very likely irreversible, depending on the OS implementation.
///
/// Files deleted programmatically are deleted directly from disk, as compared to most file
/// manager applications which use a staging area from which deleted files can be safely
/// recovered (i.e. Windows' Recycle Bin, OS X's Trash Can, etc.).
pub fn delete(self) -> io::Result<()> {
use self::SaveDir::*;
match self {
Temp(tempdir) => tempdir.close(),
Perm(pathbuf) => fs::remove_dir_all(&pathbuf),
}
}
}
impl AsRef<Path> for SaveDir {
fn as_ref(&self) -> &Path {
self.as_path()
}
}
/// The reason the save operation quit partway through.
#[derive(Debug)]
pub enum PartialReason {
/// The count limit for files in the request was hit.
///
/// The associated file has not been saved to the filesystem.
CountLimit,
/// The size limit for an individual file was hit.
///
/// The file was partially written to the filesystem.
SizeLimit,
/// An error occurred during the operation.
IoError(io::Error),
}
impl From<io::Error> for PartialReason {
fn from(e: io::Error) -> Self {
PartialReason::IoError(e)
}
}
impl PartialReason {
/// Return `io::Error` in the `IoError` case or panic otherwise.
pub fn unwrap_err(self) -> io::Error {
self.expect_err("`PartialReason` was not `IoError`")
}
/// Return `io::Error` in the `IoError` case or panic with the given
/// message otherwise.
pub fn expect_err(self, msg: &str) -> io::Error {
match self {
PartialReason::IoError(e) => e,
_ => panic!("{}: {:?}", msg, self),
}
}
}
/// The file field that was being read when the save operation quit.
///
/// May be partially saved to the filesystem if `dest` is `Some`.
#[derive(Debug)]
pub struct PartialFileField<M> {
/// The field name for the partial file.
pub field_name: String,
/// The partial file's source in the multipart stream (may be partially read if `dest`
/// is `Some`).
pub source: MultipartFile<M>,
/// The partial file's entry on the filesystem, if the operation got that far.
pub dest: Option<SavedFile>,
}
/// The partial result type for `Multipart::save*()`.
///
/// Contains the successfully saved entries as well as the partially
/// saved file that was in the process of being read when the error occurred,
/// if applicable.
#[derive(Debug)]
pub struct PartialEntries<M> {
/// The entries that were saved successfully.
pub entries: Entries,
/// The file that was in the process of being read. `None` if the error
/// occurred between file entries.
pub partial_file: Option<PartialFileField<M>>,
}
/// Discards `partial_file`
impl<M> Into<Entries> for PartialEntries<M> {
fn into(self) -> Entries {
self.entries
}
}
impl<M> PartialEntries<M> {
/// If `partial_file` is present and contains a `SavedFile` then just
/// add it to the `Entries` instance and return it.
///
/// Otherwise, returns `self.entries`
pub fn keep_partial(mut self) -> Entries {
if let Some(partial_file) = self.partial_file {
if let Some(saved_file) = partial_file.dest {
self.entries.mut_files_for(partial_file.field_name).push(saved_file);
}
}
self.entries
}
}
/// The ternary result type used for the `SaveBuilder<_>` API.
#[derive(Debug)]
pub enum SaveResult<Success, Partial> {
/// The operation was a total success. Contained is the complete result.
Full(Success),
/// The operation quit partway through. Included is the partial
/// result along with the reason.
Partial(Partial, PartialReason),
/// An error occurred at the start of the operation, before anything was done.
Error(io::Error),
}
/// Shorthand result for methods that return `Entries`
pub type EntriesSaveResult<M> = SaveResult<Entries, PartialEntries<M>>;
/// Shorthand result for methods that return `SavedFile`s.
///
/// The `MultipartFile` is not provided here because it is not necessary to return
/// a borrow when the owned version is probably in the same scope. This hopefully
/// saves some headache with the borrow-checker.
pub type FileSaveResult = SaveResult<SavedFile, SavedFile>;
impl<M> EntriesSaveResult<M> {
/// Take the `Entries` from `self`, if applicable, and discarding
/// the error, if any.
pub fn into_entries(self) -> Option<Entries> {
match self {
Full(entries) | Partial(PartialEntries { entries, .. }, _) => Some(entries),
Error(_) => None,
}
}
}
impl<S, P> SaveResult<S, P> where P: Into<S> {
/// Convert `self` to `Option<S>`; there may still have been an error.
pub fn okish(self) -> Option<S> {
self.into_opt_both().0
}
/// Map the `Full` or `Partial` values to a new type, retaining the reason
/// in the `Partial` case.
pub fn map<T, Map>(self, map: Map) -> SaveResult<T, T> where Map: FnOnce(S) -> T {
match self {
Full(full) => Full(map(full)),
Partial(partial, reason) => Partial(map(partial.into()), reason),
Error(e) => Error(e),
}
}
/// Decompose `self` to `(Option<S>, Option<io::Error>)`
pub fn into_opt_both(self) -> (Option<S>, Option<io::Error>) {
match self {
Full(full) => (Some(full), None),
Partial(partial, PartialReason::IoError(e)) => (Some(partial.into()), Some(e)),
Partial(partial, _) => (Some(partial.into()), None),
Error(error) => (None, Some(error)),
}
}
/// Map `self` to an `io::Result`, discarding the error in the `Partial` case.
pub fn into_result(self) -> io::Result<S> {
match self {
Full(entries) => Ok(entries),
Partial(partial, _) => Ok(partial.into()),
Error(error) => Err(error),
}
}
/// Pessimistic version of `into_result()` which will return an error even
/// for the `Partial` case.
///
/// ### Note: Possible Storage Leak
/// It's generally not a good idea to ignore the `Partial` case, as there may still be a
/// partially written file on-disk. If you're not using a temporary directory
/// (OS-managed or via `TempDir`) then partially written files will remain on-disk until
/// explicitly removed which could result in excessive disk usage if not monitored closely.
pub fn into_result_strict(self) -> io::Result<S> {
match self {
Full(entries) => Ok(entries),
Partial(_, PartialReason::IoError(e)) | Error(e) => Err(e),
Partial(partial, _) => Ok(partial.into()),
}
}
}
fn create_dir_all(path: &Path) -> io::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)
} else {
// RFC: return an error instead?
warn!("Attempting to save file in what looks like a root directory. File path: {:?}", path);
Ok(())
}
}
fn try_copy_buf<R: BufRead, W: Write>(mut src: R, mut dest: W) -> SaveResult<u64, u64> {
let mut total_copied = 0u64;
macro_rules! try_here (
($try:expr) => (
match $try {
Ok(val) => val,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return if total_copied == 0 { Error(e) }
else { Partial(total_copied, e.into()) },
}
)
);
loop {
let res = {
let buf = try_here!(src.fill_buf());
if buf.is_empty() { break; }
try_write_all(buf, &mut dest)
};
match res {
Full(copied) => { src.consume(copied); total_copied += copied as u64; }
Partial(copied, reason) => {
src.consume(copied); total_copied += copied as u64;
return Partial(total_copied, reason);
},
Error(err) => {
return Partial(total_copied, err.into());
}
}
}
Full(total_copied)
}
fn try_write_all<W>(mut buf: &[u8], mut dest: W) -> SaveResult<usize, usize> where W: Write {
let mut total_copied = 0;
macro_rules! try_here (
($try:expr) => (
match $try {
Ok(val) => val,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return if total_copied == 0 { Error(e) }
else { Partial(total_copied, e.into()) },
}
)
);
while !buf.is_empty() {
match try_here!(dest.write(buf)) {
0 => try_here!(Err(io::Error::new(io::ErrorKind::WriteZero,
"failed to write whole buffer"))),
copied => {
buf = &buf[copied..];
total_copied += copied;
},
}
}
Full(total_copied)
}