// Copyright 2019 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Commonly used functionality adapters.
//! At the moment, this crate contains the declaration of various errors
use {
rust_icu_sys as sys,
std::{ffi, os},
/// Represents a Unicode error, resulting from operations of low-level ICU libraries.
/// This is modeled after absl::Status in the Abseil library, which provides ways
/// for users to avoid dealing with all the numerous error codes directly.
#[derive(Error, Debug)]
pub enum Error {
/// The error originating in the underlying sys library.
/// At the moment it is possible to produce an Error which has a zero error code (i.e. no
/// error), because it makes it unnecessary for users to deal with error codes directly. It
/// does make for a bit weird API, so we may turn it around a bit. Ideally, it should not be
/// possible to have an Error that isn't really an error.
#[error("ICU error code: {}", _0)]
/// Errors originating from the wrapper code. For example when pre-converting input into
/// UTF8 for input that happens to be malformed.
Wrapper(#[from] anyhow::Error),
impl Error {
/// The error code denoting no error has happened.
pub const OK_CODE: sys::UErrorCode = sys::UErrorCode::U_ZERO_ERROR;
/// Returns true if this error code corresponds to no error.
pub fn is_ok(code: sys::UErrorCode) -> bool {
code == Self::OK_CODE
/// Creates a new error from the supplied status. Ok is returned if the error code does not
/// correspond to an error code (as opposed to OK or a warning code).
pub fn ok_or_warning(status: sys::UErrorCode) -> Result<(), Self> {
if Self::is_ok(status) || status < Self::OK_CODE {
} else {
/// Creates a new error from the supplied status. Ok is returned if the
/// error code does not constitute an error in preflight mode.
/// This error check explicitly ignores the buffer overflow error when reporting whether it
/// contains an error condition.
/// Preflight calls to ICU libraries do a read-only scan of the input to determine the buffer
/// sizes required on the output in case of conversion calls such as `ucal_strFromUTF8`. The
/// way this call is made is to offer a zero-capacity buffer (which could be pointed to by a
/// `NULL` pointer), and then call the respective function. The function will compute the
/// buffer size, but will also return a bogus buffer overflow error.
pub fn ok_preflight(status: sys::UErrorCode) -> Result<(), Self> {
if status > Self::OK_CODE && status != sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR {
} else {
/// Returns true if this error has the supplied `code`.
pub fn is_code(&self, code: sys::UErrorCode) -> bool {
if let Error::Sys(c) = self {
return *c == code;
/// Returns true if the error is an error, not a warning.
/// The ICU4C library has error codes for errors and warnings.
pub fn is_err(&self) -> bool {
match self {
Error::Sys(code) => *code > sys::UErrorCode::U_ZERO_ERROR,
Error::Wrapper(_) => true,
/// Return true if there was an error in a preflight call.
/// This error check explicitly ignores the buffer overflow error when reporting whether it
/// contains an error condition.
/// Preflight calls to ICU libraries do a read-only scan of the input to determine the buffer
/// sizes required on the output in case of conversion calls such as `ucal_strFromUTF8`. The
/// way this call is made is to offer a zero-capacity buffer (which could be pointed to by a
/// `NULL` pointer), and then call the respective function. The function will compute the
/// buffer size, but will also return a bogus buffer overflow error.
pub fn is_preflight_err(&self) -> bool {
// We may expand the set of error codes that are exempt from error checks in preflight.
self.is_err() && !self.is_code(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
/// Returns true if the error is, in fact, a warning (nonfatal).
pub fn is_warn(&self) -> bool {
match self {
Error::Sys(c) => *c < sys::UErrorCode::U_ZERO_ERROR,
_ => false,
pub fn wrapper(source: impl Into<anyhow::Error>) -> Self {
impl From<ffi::NulError> for Error {
fn from(e: ffi::NulError) -> Self {
impl From<std::str::Utf8Error> for Error {
fn from(e: std::str::Utf8Error) -> Self {
impl From<std::string::FromUtf8Error> for Error {
fn from(e: std::string::FromUtf8Error) -> Self {
impl Into<std::fmt::Error> for Error {
fn into(self) -> std::fmt::Error {
// It is not possible to transfer any info into std::fmt::Error, so we log instead.
eprintln!("error while formatting: {:?}", &self);
std::fmt::Error {}
/// `type_name` is the type to implement drop for.
/// `impl_function_name` is the name of the function that implements
/// memory deallocation. It is assumed that the type has an internal
/// representation wrapped in a [std::ptr::NonNull].
/// Example:
/// ```rust ignore
/// pub struct UNumberFormatter {
/// rep: std::ptr::NonNull<Foo>,
/// }
/// //...
/// simple_drop_impl!(UNumberFormatter, unumf_close);
/// ```
macro_rules! simple_drop_impl {
($type_name:ty, $impl_function_name:ident) => {
impl Drop for $type_name {
/// Implements `$impl_function_name`.
fn drop(&mut self) {
unsafe {
/// Generates a method to wrap ICU4C `uloc` methods that require a resizable output string buffer.
/// The various `uloc` methods of this type have inconsistent signature patterns, with some putting
/// all their input arguments _before_ the `buffer` and its `capacity`, and some splitting the input
/// arguments.
/// Therefore, the macro supports input arguments in both positions.
/// For an invocation of the form
/// ```ignore
/// buffered_string_method_with_retry!(
/// my_method,
/// [before_arg_a: before_type_a, before_arg_b: before_type_b,],
/// [after_arg_a: after_type_a, after_arg_b: after_type_b,]
/// );
/// ```
/// the generated method has a signature of the form
/// ```ignore
/// fn my_method(
/// method_to_call: unsafe extern "C" fn(
/// before_type_a,
/// before_type_b,
/// *mut raw::c_char,
/// i32,
/// after_type_a,
/// after_type_b,
/// *mut sys::UErrorCode,
/// ) -> i32,
/// before_arg_a: before_type_a,
/// before_arg_b: before_type_b,
/// after_arg_a: after_type_a,
/// after_arg_b: after_type_b
/// ) -> Result<String, common::Error> {}
/// ```
macro_rules! buffered_string_method_with_retry {
($method_name:ident, $buffer_capacity:expr,
[$($before_arg:ident: $before_arg_type:ty,)*],
[$($after_arg:ident: $after_arg_type:ty,)*]) => {
fn $method_name(
method_to_call: unsafe extern "C" fn(
*mut raw::c_char,
*mut sys::UErrorCode,
) -> i32,
$($before_arg: $before_arg_type,)*
$($after_arg: $after_arg_type,)*
) -> Result<String, common::Error> {
let mut status = common::Error::OK_CODE;
let mut buf: Vec<u8> = vec![0; $buffer_capacity];
// Requires that any pointers that are passed in are valid.
let full_len: i32 = unsafe {
buf.as_mut_ptr() as *mut raw::c_char,
$buffer_capacity as i32,
&mut status,
// ICU methods are inconsistent in whether they silently truncate the output or treat
// the overflow as an error, so we need to check both cases.
if status == sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR ||
(common::Error::is_ok(status) &&
full_len > $buffer_capacity
.map_err(|e| common::Error::wrapper(e))?) {
assert!(full_len > 0);
let full_len: usize = full_len
.map_err(|e| common::Error::wrapper(e))?;
buf.resize(full_len, 0);
// Same unsafe requirements as above, plus full_len must be exactly the output
// buffer size.
unsafe {
buf.as_mut_ptr() as *mut raw::c_char,
full_len as i32,
&mut status,
// Adjust the size of the buffer here.
if (full_len >= 0) {
let full_len: usize = full_len
.map_err(|e| common::Error::wrapper(e))?;
buf.resize(full_len, 0);
String::from_utf8(buf).map_err(|e| e.utf8_error().into())
/// There is a slew of near-identical method calls which differ in the type of
/// the input argument and the name of the function to invoke.
/// The invocation:
/// ```rust ignore
/// impl ... {
/// // ...
/// format_ustring_for_type!(format_f64, unum_formatDouble, f64);
/// }
/// ```
/// allows us to bind the function:
/// ```c++ ignore
/// int32_t unum_formatDouble(
/// const UNumberFormat* fmt,
/// double number,
/// UChar* result,
/// int32_t result_length,
/// UFieldPosition* pos,
/// UErrorCode *status)
/// ```
/// as:
/// ```rust ignore
/// impl ... {
/// format_f64(&self /* format */, value: f64) -> Result<ustring::UChar, common::Error>;
/// }
/// ```
macro_rules! format_ustring_for_type{
($method_name:ident, $function_name:ident, $type_decl:ty) => (
/// Implements `$function_name`.
pub fn $method_name(&self, number: $type_decl) -> Result<String, common::Error> {
let result = paste::item! {
self. [< $method_name _ustring>] (number)?
// Should be able to use to
// make this even shorter.
paste::item! {
/// Implements `$function_name`.
pub fn [<$method_name _ustring>] (&self, param: $type_decl) -> Result<ustring::UChar, common::Error> {
const CAPACITY: usize = 200;
[< $method_name _ustring_impl >],
[ rep: *const sys::UNumberFormat, param: $type_decl, ],
[ field: *mut sys::UFieldPosition, ]
[<$method_name _ustring_impl>](
// The field position is unused for now.
0 as *mut sys::UFieldPosition,
/// Expands into a getter method that forwards all its arguments and returns a fallible value which
/// is the same as the value returned by the underlying function.
/// The invocation:
/// ```rust ignore
/// impl _ {
/// generalized_fallible_getter!(
/// get_context,
/// unum_getContext,
/// [context_type: sys::UDisplayContextType, ],
/// sys::UDisplayContext
/// );
/// }
/// ```
/// allows us to bind the function:
/// ```c++ ignore
/// UDisplayContext unum_getContext(
/// const SOMETYPE* t,
/// UDisplayContextType type,
/// UErrorCode* status
/// );
/// ```
/// which then becomes:
/// ```rust ignore
/// impl _ {
/// fn get_context(&self, context_type: sys::UDisplayContextType) -> Result<sys::UDisplayContext, common::Error>;
/// }
/// ```
/// where `Self` has an internal representation named exactly `Self::rep`.
macro_rules! generalized_fallible_getter{
($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident: $arg_type:ty ,)* ], $ret_type:ty) => (
/// Implements `$impl_name`.
pub fn $top_level_method_name(&self, $( $arg: $arg_type, )* ) -> Result<$ret_type, common::Error> {
let mut status = common::Error::OK_CODE;
let result: $ret_type = unsafe {
versioned_function!($impl_name)(self.rep.as_ptr(), $( $arg, )* &mut status)
/// Expands into a setter methods that forwards all its arguments between []'s and returns a
/// Result<(), common::Error>.
/// The invocation:
/// ```rust ignore
/// impl _ {
/// generalized_fallible_setter!(
/// get_context,
/// unum_getContext,
/// [context_type: sys::UDisplayContextType, ]
/// );
/// }
/// ```
/// allows us to bind the function:
/// ```c++ ignore
/// UDisplayContext unum_setContext(
/// const SOMETYPE* t,
/// UDisplayContext value,
/// UErrorCode* status
/// );
/// ```
/// which then becomes:
/// ```rust ignore
/// impl _ {
/// fn set_context(&self, value: sys::UDisplayContext) -> Result<(), common::Error>;
/// }
/// ```
/// where `Self` has an internal representation named exactly `Self::rep`.
macro_rules! generalized_fallible_setter{
($top_level_method_name:ident, $impl_name:ident, [ $( $arg:ident : $arg_type:ty, )* ]) => (
[ $( $arg: $arg_type, )* ],
/// Used to simulate an array of C-style strings.
pub struct CStringVec {
// The internal representation of the vector of C strings.
rep: Vec<ffi::CString>,
// Same as rep, but converted into C pointers.
c_rep: Vec<*const os::raw::c_char>,
impl CStringVec {
/// Creates a new C string vector from the provided rust strings.
/// C strings are continuous byte regions that end in `\0` and do not
/// contain `\0` anywhere else.
/// Use `as_c_array` to get an unowned raw pointer to the array, to pass
/// into FFI C code.
pub fn new(strings: &[&str]) -> Result<Self, Error> {
let mut rep = Vec::with_capacity(strings.len());
// Convert all to asciiz strings and insert into the vector.
for elem in strings {
let asciiz = ffi::CString::new(*elem)?;
let c_rep = rep.iter().map(|e| e.as_ptr()).collect();
Ok(CStringVec { rep, c_rep })
/// Returns the underlying array of C strings as a C array pointer. The
/// array must not change after construction to ensure that this pointer
/// remains valid.
pub fn as_c_array(&self) -> *const *const os::raw::c_char {
self.c_rep.as_ptr() as *const *const os::raw::c_char
/// Returns the number of elements in the vector.
pub fn len(&self) -> usize {
/// Returns whether the vector is empty.
pub fn is_empty(&self) -> bool {
mod tests {
use super::*;
fn test_error_code() {
let error = Error::ok_or_warning(sys::UErrorCode::U_BUFFER_OVERFLOW_ERROR)
fn test_into_char_array() {
let values = vec!["eenie", "meenie", "minie", "moe"];
let c_array = CStringVec::new(&values).expect("success");
assert_eq!(c_array.len(), 4);
fn test_with_embedded_nul_byte() {
let values = vec!["hell\0x00o"];
let _c_array = CStringVec::new(&values).expect_err("should fail");
fn test_parser_error_ok() {
let tests = vec![
sys::UParseError {
line: 0,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
sys::UParseError {
line: -1,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
sys::UParseError {
line: 0,
offset: -1,
preContext: [0; 16usize],
postContext: [0; 16usize],
for test in tests {
assert!(parse_ok(test).is_ok(), "for test: {:?}", test.clone());
fn test_parser_error_not_ok() {
let tests = vec![
sys::UParseError {
line: 1,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
sys::UParseError {
line: 0,
offset: 1,
preContext: [0; 16usize],
postContext: [0; 16usize],
sys::UParseError {
line: -1,
offset: 1,
preContext: [0; 16usize],
postContext: [0; 16usize],
for test in tests {
assert!(parse_ok(test).is_err(), "for test: {:?}", test.clone());
/// A zero-value parse error, used to initialize types that get passed into FFI code.
pub static NO_PARSE_ERROR: sys::UParseError = sys::UParseError {
line: 0,
offset: 0,
preContext: [0; 16usize],
postContext: [0; 16usize],
/// Converts a parse error to a Result.
/// A parse error is an error if line or offset are positive, apparently.
pub fn parse_ok(e: sys::UParseError) -> Result<(), crate::Error> {
if e.line > 0 || e.offset > 0 {
return Err(Error::Wrapper(anyhow!(
"parse error: line: {}, offset: {}",