blob: 2f5a6d1d06dbcedf2d63a86e05abff0e990f40fd [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Commonly used functionality adapters.
//!
//! At the moment, this crate contains the declaration of various errors
use {
anyhow::anyhow,
rust_icu_sys as sys,
std::{ffi, os},
thiserror::Error,
};
/// Represents a Unicode error, resulting from operations of low-level ICU libraries.
///
/// This is modeled after absl::Status in the Abseil library, which provides ways
/// for users to avoid dealing with all the numerous error codes directly.
#[derive(Error, Debug)]
pub enum Error {
/// The error originating in the underlying sys library.
///
/// At the moment it is possible to produce an Error which has a zero error code (i.e. no
/// error), because it makes it unnecessary for users to deal with error codes directly. It
/// does make for a bit weird API, so we may turn it around a bit. Ideally, it should not be
/// possible to have an Error that isn't really an error.
#[error("ICU error code: {}", _0)]
Sys(sys::UErrorCode),
/// Errors originating from the wrapper code. For example when pre-converting input into
/// UTF8 for input that happens to be malformed.
#[error(transparent)]
Wrapper(#[from] anyhow::Error),
}
impl Error {
/// The error code denoting no error has happened.
pub const OK_CODE: sys::UErrorCode = sys::UErrorCode::U_ZERO_ERROR;
/// Returns true if this error code corresponds to no error.
pub fn is_ok(code: sys::UErrorCode) -> bool {
code == Self::OK_CODE
}
/// Creates a new error from the supplied status. Ok is returned if the error code does not
/// correspond to an error code (as opposed to OK or a warning code).
pub fn ok_or_warning(status: sys::UErrorCode) -> Result<(), Self> {
if Self::is_ok(status) || status < Self::OK_CODE {
Ok(())
} else {
Err(Error::Sys(status))
}
}
/// Creates a new error from the supplied status. Ok is returned if the
/// error code does not constitute an error in preflight mode.
///
/// This error check explicitly ignores the buffer overflow error when reporting whether it
/// contains an error condition.
///
/// Preflight calls to ICU libraries do a dummy scan of the input to determine the buffer sizes
/// required on the output in case of conversion calls such as `ucal_strFromUTF8`. The way
/// this call is made is to offer a zero-capacity buffer (which could be pointed to by a `NULL`
/// pointer), and then call the respective function. The function will compute the buffer
/// size, but will also return a bogus buffer overflow error.
pub fn ok_preflight(status: sys::UErrorCode) -> Result<(), Self> {
if status > Self::OK_CODE && status != sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR {
Err(Error::Sys(status))
} else {
Ok(())
}
}
/// Returns true if this error has the supplied `code`.
pub fn is_code(&self, code: sys::UErrorCode) -> bool {
if let Error::Sys(c) = self {
return *c == code;
}
false
}
/// Returns true if the error is an error, not a warning.
///
/// The ICU4C library has error codes for errors and warnings.
pub fn is_err(&self) -> bool {
match self {
Error::Sys(code) => *code > sys::UErrorCode::U_ZERO_ERROR,
Error::Wrapper(_) => true,
}
}
/// Return true if there was an error in a preflight call.
///
/// This error check explicitly ignores the buffer overflow error when reporting whether it
/// contains an error condition.
///
/// Preflight calls to ICU libraries do a dummy scan of the input to determine the buffer sizes
/// required on the output in case of conversion calls such as `ucal_strFromUTF8`. The way
/// this call is made is to offer a zero-capacity buffer (which could be pointed to by a `NULL`
/// pointer), and then call the respective function. The function will compute the buffer
/// size, but will also return a bogus buffer overflow error.
pub fn is_preflight_err(&self) -> bool {
// We may expand the set of error codes that are exempt from error checks in preflight.
self.is_err() && !self.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
}
/// Returns true if the error is, in fact, a warning (nonfatal).
pub fn is_warn(&self) -> bool {
match self {
Error::Sys(c) => *c < sys::UErrorCode::U_ZERO_ERROR,
_ => false,
}
}
pub fn wrapper(source: impl Into<anyhow::Error>) -> Self {
Self::Wrapper(source.into())
}
}
impl From<ffi::NulError> for Error {
fn from(e: ffi::NulError) -> Self {
Self::wrapper(e)
}
}
impl From<std::str::Utf8Error> for Error {
fn from(e: std::str::Utf8Error) -> Self {
Self::wrapper(e)
}
}
impl From<std::string::FromUtf8Error> for Error {
fn from(e: std::string::FromUtf8Error) -> Self {
Self::wrapper(e)
}
}
impl Into<std::fmt::Error> for Error {
fn into(self) -> std::fmt::Error {
// It is not possible to transfer any info into std::fmt::Error, so we log instead.
eprintln!("error while formatting: {:?}", &self);
std::fmt::Error {}
}
}
/// `type_name` is the type to implement drop for.
/// `impl_function_name` is the name of the function that implements
/// memory deallocation. It is assumed that the type has an internal
/// representation wrapped in a [std::ptr::NonNull].
///
/// Example:
///
/// ```rust ignore
/// pub struct UNumberFormatter {
/// rep: std::ptr::NonNull<Foo>,
/// }
/// //...
/// simple_drop_impl!(UNumberFormatter, unumf_close);
/// ```
#[macro_export]
macro_rules! simple_drop_impl {
($type_name:ty, $impl_function_name:ident) => {
impl Drop for $type_name {
/// Implements `$impl_function_name`.
fn drop(&mut self) {
unsafe {
versioned_function!($impl_function_name)(self.rep.as_ptr());
}
}
}
};
}
/// Generates a method to wrap ICU4C `uloc` methods that require a resizable output string buffer.
///
/// The various `uloc` methods of this type have inconsistent signature patterns, with some putting
/// all their input arguments _before_ the `buffer` and its `capacity`, and some splitting the input
/// arguments.
///
/// Therefore, the macro supports input arguments in both positions.
///
/// For an invocation of the form
///
/// ```ignore
/// buffered_string_method_with_retry!(
/// my_method,
/// BUFFER_CAPACITY,
/// [before_arg_a: before_type_a, before_arg_b: before_type_b,],
/// [after_arg_a: after_type_a, after_arg_b: after_type_b,]
/// );
/// ```
///
/// the generated method has a signature of the form
///
/// ```ignore
/// fn my_method(
/// method_to_call: unsafe extern "C" fn(
/// before_type_a,
/// before_type_b,
/// *mut raw::c_char,
/// i32,
/// after_type_a,
/// after_type_b,
/// *mut sys::UErrorCode,
/// ) -> i32,
/// before_arg_a: before_type_a,
/// before_arg_b: before_type_b,
/// after_arg_a: after_type_a,
/// after_arg_b: after_type_b
/// ) -> Result<String, common::Error> {}
/// ```
#[macro_export]
macro_rules! buffered_string_method_with_retry {
($method_name:ident, $buffer_capacity:expr,
[$($before_arg:ident: $before_arg_type:ty,)*],
[$($after_arg:ident: $after_arg_type:ty,)*]) => {
fn $method_name(
method_to_call: unsafe extern "C" fn(
$($before_arg_type,)*
*mut raw::c_char,
i32,
$($after_arg_type,)*
*mut sys::UErrorCode,
) -> i32,
$($before_arg: $before_arg_type,)*
$($after_arg: $after_arg_type,)*
) -> Result<String, common::Error> {
let mut status = common::Error::OK_CODE;
let mut buf: Vec<u8> = vec![0; $buffer_capacity];
// Requires that any pointers that are passed in are valid.
let full_len: i32 = unsafe {
assert!(common::Error::is_ok(status));
method_to_call(
$($before_arg,)*
buf.as_mut_ptr() as *mut raw::c_char,
$buffer_capacity as i32,
$($after_arg,)*
&mut status,
)
};
// ICU methods are inconsistent in whether they silently truncate the output or treat
// the overflow as an error, so we need to check both cases.
if status == sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR ||
(common::Error::is_ok(status) &&
full_len > $buffer_capacity
.try_into()
.map_err(|e| common::Error::wrapper(e))?) {
assert!(full_len > 0);
let full_len: usize = full_len
.try_into()
.map_err(|e| common::Error::wrapper(e))?;
buf.resize(full_len, 0);
// Same unsafe requirements as above, plus full_len must be exactly the output
// buffer size.
unsafe {
assert!(common::Error::is_ok(status));
method_to_call(
$($before_arg,)*
buf.as_mut_ptr() as *mut raw::c_char,
full_len as i32,
$($after_arg,)*
&mut status,
)
};
}
common::Error::ok_or_warning(status)?;
// Adjust the size of the buffer here.
if (full_len >= 0) {
let full_len: usize = full_len
.try_into()
.map_err(|e| common::Error::wrapper(e))?;
buf.resize(full_len, 0);
}
String::from_utf8(buf).map_err(|e| e.utf8_error().into())
}
}
}
/// There is a slew of near-identical method calls which differ in the type of
/// the input argument and the name of the function to invoke.
///
/// The invocation:
///
/// ```rust ignore
/// impl ... {
/// // ...
/// format_ustring_for_type!(format_f64, unum_formatDouble, f64);
/// }
/// ```
///
/// allows us to bind the function:
///
/// ```c++ ignore
/// int32_t unum_formatDouble(
/// const UNumberFormat* fmt,
/// double number,
/// UChar* result,
/// int32_t result_length,
/// UFieldPosition* pos,
/// UErrorCode *status)
/// ```
///
/// as:
///
/// ```rust ignore
/// impl ... {
/// format_f64(&self /* format */, value: f64) -> Result<ustring::UChar, common::Error>;
/// }
/// ```
#[macro_export]
macro_rules! format_ustring_for_type{
($method_name:ident, $function_name:ident, $type_decl:ty) => (
/// Implements `$function_name`.
pub fn $method_name(&self, number: $type_decl) -> Result<String, common::Error> {
let result = paste::item! {
self. [< $method_name _ustring>] (number)?
};
String::try_from(&result)
}
// Should be able to use https://github.com/google/rust_icu/pull/144 to
// make this even shorter.
paste::item! {
/// Implements `$function_name`.
pub fn [<$method_name _ustring>] (&self, param: $type_decl) -> Result<ustring::UChar, common::Error> {
const CAPACITY: usize = 200;
buffered_uchar_method_with_retry!(
[< $method_name _ustring_impl >],
CAPACITY,
[ rep: *const sys::UNumberFormat, param: $type_decl, ],
[ field: *mut sys::UFieldPosition, ]
);
[<$method_name _ustring_impl>](
versioned_function!($function_name),
self.rep.as_ptr(),
param,
// The field position is unused for now.
0 as *mut sys::UFieldPosition,
)
}
}
)
}
/// Expands into a getter method that forwards all its arguments and returns a fallible value which
/// is the same as the value returned by the underlying function.
///
/// The invocation:
///
/// ```rust ignore
/// impl _ {
/// generalized_fallible_getter!(
/// get_context,
/// unum_getContext,
/// [context_type: sys::UDisplayContextType, ],
/// sys::UDisplayContext
/// );
/// }
/// ```
///
/// allows us to bind the function:
///
/// ```c++ ignore
/// UDisplayContext unum_getContext(
/// const SOMETYPE* t,
/// UDisplayContextType type,
/// UErrorCode* status
/// );
/// ```
///
/// which then becomes:
///
/// ```rust ignore
/// impl _ {
/// fn get_context(&self, context_type: sys::UDisplayContextType) -> Result<sys::UDisplayContext, common::Error>;
/// }
/// ```
/// where `Self` has an internal representation named exactly `Self::rep`.
#[macro_export]
macro_rules! generalized_fallible_getter{
($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident: $arg_type:ty ,)* ], $ret_type:ty) => (
/// Implements `$impl_name`.
pub fn $top_level_method_name(&self, $( $arg: $arg_type, )* ) -> Result<$ret_type, common::Error> {
let mut status = common::Error::OK_CODE;
let result: $ret_type = unsafe {
assert!(common::Error::is_ok(status));
versioned_function!($impl_name)(self.rep.as_ptr(), $( $arg, )* &mut status)
};
common::Error::ok_or_warning(status)?;
Ok(result)
}
)
}
/// Expands into a setter methods that forwards all its arguments between []'s and returns a
/// Result<(), common::Error>.
///
/// The invocation:
///
/// ```rust ignore
/// impl _ {
/// generalized_fallible_setter!(
/// get_context,
/// unum_getContext,
/// [context_type: sys::UDisplayContextType, ]
/// );
/// }
/// ```
///
/// allows us to bind the function:
///
/// ```c++ ignore
/// UDisplayContext unum_setContext(
/// const SOMETYPE* t,
/// UDisplayContext value,
/// UErrorCode* status
/// );
/// ```
///
/// which then becomes:
///
/// ```rust ignore
/// impl _ {
/// fn set_context(&self, value: sys::UDisplayContext) -> Result<(), common::Error>;
/// }
/// ```
/// where `Self` has an internal representation named exactly `Self::rep`.
#[macro_export]
macro_rules! generalized_fallible_setter{
($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident : $arg_type:ty, )* ]) => (
generalized_fallible_getter!(
$top_level_method_name,
$impl_name,
[ $( $arg: $arg_type, )* ],
());
)
}
/// Used to simulate an array of C-style strings.
#[derive(Debug)]
pub struct CStringVec {
// The internal representation of the vector of C strings.
rep: Vec<ffi::CString>,
// Same as rep, but converted into C pointers.
c_rep: Vec<*const os::raw::c_char>,
}
impl CStringVec {
/// Creates a new C string vector from the provided rust strings.
///
/// C strings are continuous byte regions that end in `\0` and do not
/// contain `\0` anywhere else.
///
/// Use `as_c_array` to get an unowned raw pointer to the array, to pass
/// into FFI C code.
pub fn new(strings: &[&str]) -> Result<Self, Error> {
let mut rep = Vec::with_capacity(strings.len());
// Convert all to asciiz strings and insert into the vector.
for elem in strings {
let asciiz = ffi::CString::new(*elem)?;
rep.push(asciiz);
}
let c_rep = rep.iter().map(|e| e.as_ptr()).collect();
Ok(CStringVec { rep, c_rep })
}
/// Returns the underlying array of C strings as a C array pointer. The
/// array must not change after construction to ensure that this pointer
/// remains valid.
pub fn as_c_array(&self) -> *const *const os::raw::c_char {
self.c_rep.as_ptr() as *const *const os::raw::c_char
}
/// Returns the number of elements in the vector.
pub fn len(&self) -> usize {
self.rep.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_code() {
let error = Error::ok_or_warning(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
.err()
.unwrap();
assert!(error.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR));
assert!(!error.is_preflight_err());
assert!(!error.is_code(sys::UErrorCode::U_ZERO_ERROR));
}
#[test]
fn test_into_char_array() {
let values = vec!["eenie", "meenie", "minie", "moe"];
let c_array = CStringVec::new(&values).expect("success");
assert_eq!(c_array.len(), 4);
}
#[test]
fn test_with_embedded_nul_byte() {
let values = vec!["hell\0x00o"];
let _c_array = CStringVec::new(&values).expect_err("should fail");
}
#[test]
fn test_parser_error_ok() {
let tests = vec![
sys::UParseError {
line: 0,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
},
sys::UParseError {
line: -1,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
},
sys::UParseError {
line: 0,
offset: -1,
preContext: [0; 16usize],
postContext: [0; 16usize],
},
];
for test in tests {
assert!(
parse_ok(test.clone()).is_ok(),
"for test: {:?}",
test.clone()
);
}
}
#[test]
fn test_parser_error_not_ok() {
let tests = vec![
sys::UParseError {
line: 1,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
},
sys::UParseError {
line: 0,
offset: 1,
preContext: [0; 16usize],
postContext: [0; 16usize],
},
sys::UParseError {
line: -1,
offset: 1,
preContext: [0; 16usize],
postContext: [0; 16usize],
},
];
for test in tests {
assert!(
parse_ok(test.clone()).is_err(),
"for test: {:?}",
test.clone()
);
}
}
}
/// A zero-value parse error, used to initialize types that get passed into FFI code.
pub static NO_PARSE_ERROR: sys::UParseError = sys::UParseError {
line: 0,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
};
/// Converts a parse error to a Result.
///
/// A parse error is an error if line or offset are positive, apparently.
pub fn parse_ok(e: sys::UParseError) -> Result<(), crate::Error> {
if e.line > 0 || e.offset > 0 {
return Err(Error::Wrapper(anyhow!(
"parse error: line: {}, offset: {}",
e.line,
e.offset
)));
}
Ok(())
}