blob: f755a49873418790df75c41d574f5e1874a71043 [file] [log] [blame]
/*
* Copyright 2018 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
extern crate smallvec;
use std::cmp::max;
use std::marker::PhantomData;
use std::ptr::write_bytes;
use std::slice::from_raw_parts;
use endian_scalar::{read_scalar, emplace_scalar};
use primitives::*;
use push::{Push, PushAlignment};
use table::Table;
use vtable::{VTable, field_index_to_field_offset};
use vtable_writer::VTableWriter;
use vector::{SafeSliceAccess, Vector};
pub const N_SMALLVEC_STRING_VECTOR_CAPACITY: usize = 16;
#[derive(Clone, Copy, Debug)]
struct FieldLoc {
off: UOffsetT,
id: VOffsetT,
}
/// FlatBufferBuilder builds a FlatBuffer through manipulating its internal
/// state. It has an owned `Vec<u8>` that grows as needed (up to the hardcoded
/// limit of 2GiB, which is set by the FlatBuffers format).
pub struct FlatBufferBuilder<'fbb> {
owned_buf: Vec<u8>,
head: usize,
field_locs: Vec<FieldLoc>,
written_vtable_revpos: Vec<UOffsetT>,
nested: bool,
finished: bool,
min_align: usize,
_phantom: PhantomData<&'fbb ()>,
}
impl<'fbb> FlatBufferBuilder<'fbb> {
/// Create a FlatBufferBuilder that is ready for writing.
pub fn new() -> Self {
Self::new_with_capacity(0)
}
/// Create a FlatBufferBuilder that is ready for writing, with a
/// ready-to-use capacity of the provided size.
///
/// The maximum valid value is `FLATBUFFERS_MAX_BUFFER_SIZE`.
pub fn new_with_capacity(size: usize) -> Self {
// we need to check the size here because we create the backing buffer
// directly, bypassing the typical way of using grow_owned_buf:
assert!(size <= FLATBUFFERS_MAX_BUFFER_SIZE,
"cannot initialize buffer bigger than 2 gigabytes");
FlatBufferBuilder {
owned_buf: vec![0u8; size],
head: size,
field_locs: Vec::new(),
written_vtable_revpos: Vec::new(),
nested: false,
finished: false,
min_align: 0,
_phantom: PhantomData,
}
}
/// Reset the FlatBufferBuilder internal state. Use this method after a
/// call to a `finish` function in order to re-use a FlatBufferBuilder.
///
/// This function is the only way to reset the `finished` state and start
/// again.
///
/// If you are using a FlatBufferBuilder repeatedly, make sure to use this
/// function, because it re-uses the FlatBufferBuilder's existing
/// heap-allocated `Vec<u8>` internal buffer. This offers significant speed
/// improvements as compared to creating a new FlatBufferBuilder for every
/// new object.
pub fn reset(&mut self) {
// memset only the part of the buffer that could be dirty:
{
let to_clear = self.owned_buf.len() - self.head;
let ptr = (&mut self.owned_buf[self.head..]).as_mut_ptr();
unsafe { write_bytes(ptr, 0, to_clear); }
}
self.head = self.owned_buf.len();
self.written_vtable_revpos.clear();
self.nested = false;
self.finished = false;
self.min_align = 0;
}
/// Destroy the FlatBufferBuilder, returning its internal byte vector
/// and the index into it that represents the start of valid data.
pub fn collapse(self) -> (Vec<u8>, usize) {
(self.owned_buf, self.head)
}
/// Push a Push'able value onto the front of the in-progress data.
///
/// This function uses traits to provide a unified API for writing
/// scalars, tables, vectors, and WIPOffsets.
#[inline]
pub fn push<P: Push>(&mut self, x: P) -> WIPOffset<P::Output> {
let sz = P::size();
self.align(sz, P::alignment());
self.make_space(sz);
{
let (dst, rest) = (&mut self.owned_buf[self.head..]).split_at_mut(sz);
x.push(dst, rest);
}
WIPOffset::new(self.used_space() as UOffsetT)
}
/// Push a Push'able value onto the front of the in-progress data, and
/// store a reference to it in the in-progress vtable. If the value matches
/// the default, then this is a no-op.
#[inline]
pub fn push_slot<X: Push + PartialEq>(&mut self, slotoff: VOffsetT, x: X, default: X) {
self.assert_nested("push_slot");
if x == default {
return;
}
self.push_slot_always(slotoff, x);
}
/// Push a Push'able value onto the front of the in-progress data, and
/// store a reference to it in the in-progress vtable.
#[inline]
pub fn push_slot_always<X: Push>(&mut self, slotoff: VOffsetT, x: X) {
self.assert_nested("push_slot_always");
let off = self.push(x);
self.track_field(slotoff, off.value());
}
/// Retrieve the number of vtables that have been serialized into the
/// FlatBuffer. This is primarily used to check vtable deduplication.
#[inline]
pub fn num_written_vtables(&self) -> usize {
self.written_vtable_revpos.len()
}
/// Start a Table write.
///
/// Asserts that the builder is not in a nested state.
///
/// Users probably want to use `push_slot` to add values after calling this.
#[inline]
pub fn start_table(&mut self) -> WIPOffset<TableUnfinishedWIPOffset> {
self.assert_not_nested("start_table can not be called when a table or vector is under construction");
self.nested = true;
WIPOffset::new(self.used_space() as UOffsetT)
}
/// End a Table write.
///
/// Asserts that the builder is in a nested state.
#[inline]
pub fn end_table(&mut self, off: WIPOffset<TableUnfinishedWIPOffset>) -> WIPOffset<TableFinishedWIPOffset> {
self.assert_nested("end_table");
let o = self.write_vtable(off);
self.nested = false;
self.field_locs.clear();
WIPOffset::new(o.value())
}
/// Start a Vector write.
///
/// Asserts that the builder is not in a nested state.
///
/// Most users will prefer to call `create_vector`.
/// Speed optimizing users who choose to create vectors manually using this
/// function will want to use `push` to add values.
#[inline]
pub fn start_vector<T: Push>(&mut self, num_items: usize) {
self.assert_not_nested("start_vector can not be called when a table or vector is under construction");
self.nested = true;
self.align(num_items * T::size(), T::alignment().max_of(SIZE_UOFFSET));
}
/// End a Vector write.
///
/// Note that the `num_elems` parameter is the number of written items, not
/// the byte count.
///
/// Asserts that the builder is in a nested state.
#[inline]
pub fn end_vector<T: Push>(&mut self, num_elems: usize) -> WIPOffset<Vector<'fbb, T>> {
self.assert_nested("end_vector");
self.nested = false;
let o = self.push::<UOffsetT>(num_elems as UOffsetT);
WIPOffset::new(o.value())
}
/// Create a utf8 string.
///
/// The wire format represents this as a zero-terminated byte vector.
#[inline]
pub fn create_string<'a: 'b, 'b>(&'a mut self, s: &'b str) -> WIPOffset<&'fbb str> {
self.assert_not_nested("create_string can not be called when a table or vector is under construction");
WIPOffset::new(self.create_byte_string(s.as_bytes()).value())
}
/// Create a zero-terminated byte vector.
#[inline]
pub fn create_byte_string(&mut self, data: &[u8]) -> WIPOffset<&'fbb [u8]> {
self.assert_not_nested("create_byte_string can not be called when a table or vector is under construction");
self.align(data.len() + 1, PushAlignment::new(SIZE_UOFFSET));
self.push(0u8);
self.push_bytes_unprefixed(data);
self.push(data.len() as UOffsetT);
WIPOffset::new(self.used_space() as UOffsetT)
}
/// Create a vector by memcpy'ing. This is much faster than calling
/// `create_vector`, but the underlying type must be represented as
/// little-endian on the host machine. This property is encoded in the
/// type system through the SafeSliceAccess trait. The following types are
/// always safe, on any platform: bool, u8, i8, and any
/// FlatBuffers-generated struct.
#[inline]
pub fn create_vector_direct<'a: 'b, 'b, T: SafeSliceAccess + Push + Sized + 'b>(&'a mut self, items: &'b [T]) -> WIPOffset<Vector<'fbb, T>> {
self.assert_not_nested("create_vector_direct can not be called when a table or vector is under construction");
let elem_size = T::size();
self.align(items.len() * elem_size, T::alignment().max_of(SIZE_UOFFSET));
let bytes = {
let ptr = items.as_ptr() as *const T as *const u8;
unsafe { from_raw_parts(ptr, items.len() * elem_size) }
};
self.push_bytes_unprefixed(bytes);
self.push(items.len() as UOffsetT);
WIPOffset::new(self.used_space() as UOffsetT)
}
/// Create a vector of strings.
///
/// Speed-sensitive users may wish to reduce memory usage by creating the
/// vector manually: use `start_vector`, `push`, and `end_vector`.
#[inline]
pub fn create_vector_of_strings<'a, 'b>(&'a mut self, xs: &'b [&'b str]) -> WIPOffset<Vector<'fbb, ForwardsUOffset<&'fbb str>>> {
self.assert_not_nested("create_vector_of_strings can not be called when a table or vector is under construction");
// internally, smallvec can be a stack-allocated or heap-allocated vector:
// if xs.len() > N_SMALLVEC_STRING_VECTOR_CAPACITY then it will overflow to the heap.
let mut offsets: smallvec::SmallVec<[WIPOffset<&str>; N_SMALLVEC_STRING_VECTOR_CAPACITY]> = smallvec::SmallVec::with_capacity(xs.len());
unsafe { offsets.set_len(xs.len()); }
// note that this happens in reverse, because the buffer is built back-to-front:
for (i, &s) in xs.iter().enumerate().rev() {
let o = self.create_string(s);
offsets[i] = o;
}
self.create_vector(&offsets[..])
}
/// Create a vector of Push-able objects.
///
/// Speed-sensitive users may wish to reduce memory usage by creating the
/// vector manually: use `start_vector`, `push`, and `end_vector`.
#[inline]
pub fn create_vector<'a: 'b, 'b, T: Push + Copy + 'b>(&'a mut self, items: &'b [T]) -> WIPOffset<Vector<'fbb, T::Output>> {
let elem_size = T::size();
self.align(items.len() * elem_size, T::alignment().max_of(SIZE_UOFFSET));
for i in (0..items.len()).rev() {
self.push(items[i]);
}
WIPOffset::new(self.push::<UOffsetT>(items.len() as UOffsetT).value())
}
/// Get the byte slice for the data that has been written, regardless of
/// whether it has been finished.
#[inline]
pub fn unfinished_data(&self) -> &[u8] {
&self.owned_buf[self.head..]
}
/// Get the byte slice for the data that has been written after a call to
/// one of the `finish` functions.
#[inline]
pub fn finished_data(&self) -> &[u8] {
self.assert_finished("finished_bytes cannot be called when the buffer is not yet finished");
&self.owned_buf[self.head..]
}
/// Assert that a field is present in the just-finished Table.
///
/// This is somewhat low-level and is mostly used by the generated code.
#[inline]
pub fn required(&self,
tab_revloc: WIPOffset<TableFinishedWIPOffset>,
slot_byte_loc: VOffsetT,
assert_msg_name: &'static str) {
let idx = self.used_space() - tab_revloc.value() as usize;
let tab = Table::new(&self.owned_buf[self.head..], idx);
let o = tab.vtable().get(slot_byte_loc) as usize;
assert!(o != 0, "missing required field {}", assert_msg_name);
}
/// Finalize the FlatBuffer by: aligning it, pushing an optional file
/// identifier on to it, pushing a size prefix on to it, and marking the
/// internal state of the FlatBufferBuilder as `finished`. Afterwards,
/// users can call `finished_data` to get the resulting data.
#[inline]
pub fn finish_size_prefixed<T>(&mut self, root: WIPOffset<T>, file_identifier: Option<&str>) {
self.finish_with_opts(root, file_identifier, true);
}
/// Finalize the FlatBuffer by: aligning it, pushing an optional file
/// identifier on to it, and marking the internal state of the
/// FlatBufferBuilder as `finished`. Afterwards, users can call
/// `finished_data` to get the resulting data.
#[inline]
pub fn finish<T>(&mut self, root: WIPOffset<T>, file_identifier: Option<&str>) {
self.finish_with_opts(root, file_identifier, false);
}
/// Finalize the FlatBuffer by: aligning it and marking the internal state
/// of the FlatBufferBuilder as `finished`. Afterwards, users can call
/// `finished_data` to get the resulting data.
#[inline]
pub fn finish_minimal<T>(&mut self, root: WIPOffset<T>) {
self.finish_with_opts(root, None, false);
}
#[inline]
fn used_space(&self) -> usize {
self.owned_buf.len() - self.head as usize
}
#[inline]
fn track_field(&mut self, slot_off: VOffsetT, off: UOffsetT) {
let fl = FieldLoc {
id: slot_off,
off: off,
};
self.field_locs.push(fl);
}
/// Write the VTable, if it is new.
fn write_vtable(&mut self, table_tail_revloc: WIPOffset<TableUnfinishedWIPOffset>) -> WIPOffset<VTableWIPOffset> {
self.assert_nested("write_vtable");
// Write the vtable offset, which is the start of any Table.
// We fill its value later.
let object_revloc_to_vtable: WIPOffset<VTableWIPOffset> =
WIPOffset::new(self.push::<UOffsetT>(0xF0F0F0F0 as UOffsetT).value());
// Layout of the data this function will create when a new vtable is
// needed.
// --------------------------------------------------------------------
// vtable starts here
// | x, x -- vtable len (bytes) [u16]
// | x, x -- object inline len (bytes) [u16]
// | x, x -- zero, or num bytes from start of object to field #0 [u16]
// | ...
// | x, x -- zero, or num bytes from start of object to field #n-1 [u16]
// vtable ends here
// table starts here
// | x, x, x, x -- offset (negative direction) to the vtable [i32]
// | aka "vtableoffset"
// | -- table inline data begins here, we don't touch it --
// table ends here -- aka "table_start"
// --------------------------------------------------------------------
//
// Layout of the data this function will create when we re-use an
// existing vtable.
//
// We always serialize this particular vtable, then compare it to the
// other vtables we know about to see if there is a duplicate. If there
// is, then we erase the serialized vtable we just made.
// We serialize it first so that we are able to do byte-by-byte
// comparisons with already-serialized vtables. This 1) saves
// bookkeeping space (we only keep revlocs to existing vtables), 2)
// allows us to convert to little-endian once, then do
// fast memcmp comparisons, and 3) by ensuring we are comparing real
// serialized vtables, we can be more assured that we are doing the
// comparisons correctly.
//
// --------------------------------------------------------------------
// table starts here
// | x, x, x, x -- offset (negative direction) to an existing vtable [i32]
// | aka "vtableoffset"
// | -- table inline data begins here, we don't touch it --
// table starts here: aka "table_start"
// --------------------------------------------------------------------
// fill the WIP vtable with zeros:
let vtable_byte_len = get_vtable_byte_len(&self.field_locs);
self.make_space(vtable_byte_len);
// compute the length of the table (not vtable!) in bytes:
let table_object_size = object_revloc_to_vtable.value() - table_tail_revloc.value();
debug_assert!(table_object_size < 0x10000); // vTable use 16bit offsets.
// Write the VTable (we may delete it afterwards, if it is a duplicate):
let vt_start_pos = self.head;
let vt_end_pos = self.head + vtable_byte_len;
{
// write the vtable header:
let vtfw = &mut VTableWriter::init(&mut self.owned_buf[vt_start_pos..vt_end_pos]);
vtfw.write_vtable_byte_length(vtable_byte_len as VOffsetT);
vtfw.write_object_inline_size(table_object_size as VOffsetT);
// serialize every FieldLoc to the vtable:
for &fl in self.field_locs.iter() {
let pos: VOffsetT = (object_revloc_to_vtable.value() - fl.off) as VOffsetT;
debug_assert_eq!(vtfw.get_field_offset(fl.id),
0,
"tried to write a vtable field multiple times");
vtfw.write_field_offset(fl.id, pos);
}
}
let dup_vt_use = {
let this_vt = VTable::init(&self.owned_buf[..], self.head);
self.find_duplicate_stored_vtable_revloc(this_vt)
};
let vt_use = match dup_vt_use {
Some(n) => {
VTableWriter::init(&mut self.owned_buf[vt_start_pos..vt_end_pos]).clear();
self.head += vtable_byte_len;
n
}
None => {
let new_vt_use = self.used_space() as UOffsetT;
self.written_vtable_revpos.push(new_vt_use);
new_vt_use
}
};
{
let n = self.head + self.used_space() - object_revloc_to_vtable.value() as usize;
let saw = read_scalar::<UOffsetT>(&self.owned_buf[n..n + SIZE_SOFFSET]);
debug_assert_eq!(saw, 0xF0F0F0F0);
emplace_scalar::<SOffsetT>(&mut self.owned_buf[n..n + SIZE_SOFFSET],
vt_use as SOffsetT - object_revloc_to_vtable.value() as SOffsetT);
}
self.field_locs.clear();
object_revloc_to_vtable
}
#[inline]
fn find_duplicate_stored_vtable_revloc(&self, needle: VTable) -> Option<UOffsetT> {
for &revloc in self.written_vtable_revpos.iter().rev() {
let o = VTable::init(&self.owned_buf[..], self.head + self.used_space() - revloc as usize);
if needle == o {
return Some(revloc);
}
}
None
}
// Only call this when you know it is safe to double the size of the buffer.
#[inline]
fn grow_owned_buf(&mut self) {
let old_len = self.owned_buf.len();
let new_len = max(1, old_len * 2);
let starting_active_size = self.used_space();
let diff = new_len - old_len;
self.owned_buf.resize(new_len, 0);
self.head += diff;
let ending_active_size = self.used_space();
debug_assert_eq!(starting_active_size, ending_active_size);
if new_len == 1 {
return;
}
// calculate the midpoint, and safely copy the old end data to the new
// end position:
let middle = new_len / 2;
{
let (left, right) = &mut self.owned_buf[..].split_at_mut(middle);
right.copy_from_slice(left);
}
// finally, zero out the old end data.
{
let ptr = (&mut self.owned_buf[..middle]).as_mut_ptr();
unsafe { write_bytes(ptr, 0, middle); }
}
}
// with or without a size prefix changes how we load the data, so finish*
// functions are split along those lines.
fn finish_with_opts<T>(&mut self,
root: WIPOffset<T>,
file_identifier: Option<&str>,
size_prefixed: bool) {
self.assert_not_finished("buffer cannot be finished when it is already finished");
self.assert_not_nested("buffer cannot be finished when a table or vector is under construction");
self.written_vtable_revpos.clear();
let to_align = {
// for the root offset:
let a = SIZE_UOFFSET;
// for the size prefix:
let b = if size_prefixed { SIZE_UOFFSET } else { 0 };
// for the file identifier (a string that is not zero-terminated):
let c = if file_identifier.is_some() {
FILE_IDENTIFIER_LENGTH
} else {
0
};
a + b + c
};
{
let ma = PushAlignment::new(self.min_align);
self.align(to_align, ma);
}
if let Some(ident) = file_identifier {
debug_assert_eq!(ident.len(), FILE_IDENTIFIER_LENGTH);
self.push_bytes_unprefixed(ident.as_bytes());
}
self.push(root);
if size_prefixed {
let sz = self.used_space() as UOffsetT;
self.push::<UOffsetT>(sz);
}
self.finished = true;
}
#[inline]
fn align(&mut self, len: usize, alignment: PushAlignment) {
self.track_min_align(alignment.value());
let s = self.used_space() as usize;
self.make_space(padding_bytes(s + len, alignment.value()));
}
#[inline]
fn track_min_align(&mut self, alignment: usize) {
self.min_align = max(self.min_align, alignment);
}
#[inline]
fn push_bytes_unprefixed(&mut self, x: &[u8]) -> UOffsetT {
let n = self.make_space(x.len());
&mut self.owned_buf[n..n + x.len()].copy_from_slice(x);
n as UOffsetT
}
#[inline]
fn make_space(&mut self, want: usize) -> usize {
self.ensure_capacity(want);
self.head -= want;
self.head
}
#[inline]
fn ensure_capacity(&mut self, want: usize) -> usize {
if self.unused_ready_space() >= want {
return want;
}
assert!(want <= FLATBUFFERS_MAX_BUFFER_SIZE,
"cannot grow buffer beyond 2 gigabytes");
while self.unused_ready_space() < want {
self.grow_owned_buf();
}
want
}
#[inline]
fn unused_ready_space(&self) -> usize {
self.head
}
#[inline]
fn assert_nested(&self, fn_name: &'static str) {
// we don't assert that self.field_locs.len() >0 because the vtable
// could be empty (e.g. for empty tables, or for all-default values).
debug_assert!(self.nested, format!("incorrect FlatBufferBuilder usage: {} must be called while in a nested state", fn_name));
}
#[inline]
fn assert_not_nested(&self, msg: &'static str) {
debug_assert!(!self.nested, msg);
}
#[inline]
fn assert_finished(&self, msg: &'static str) {
debug_assert!(self.finished, msg);
}
#[inline]
fn assert_not_finished(&self, msg: &'static str) {
debug_assert!(!self.finished, msg);
}
}
/// Compute the length of the vtable needed to represent the provided FieldLocs.
/// If there are no FieldLocs, then provide the minimum number of bytes
/// required: enough to write the VTable header.
#[inline]
fn get_vtable_byte_len(field_locs: &[FieldLoc]) -> usize {
let max_voffset = field_locs.iter().map(|fl| fl.id).max();
match max_voffset {
None => { field_index_to_field_offset(0) as usize }
Some(mv) => { mv as usize + SIZE_VOFFSET }
}
}
#[inline]
fn padding_bytes(buf_size: usize, scalar_size: usize) -> usize {
// ((!buf_size) + 1) & (scalar_size - 1)
(!buf_size).wrapping_add(1) & (scalar_size.wrapping_sub(1))
}