blob: 1b5339f2337419177b2f481ea6e6de690bee0ce3 [file] [log] [blame]
/*
* Copyright (C) the libgit2 contributors. All rights reserved.
*
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
*/
#include <zlib.h>
#include "git2/indexer.h"
#include "git2/object.h"
#include "common.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
#include "pack.h"
#include "filebuf.h"
#include "oid.h"
#include "oidmap.h"
#define UINT31_MAX (0x7FFFFFFF)
struct entry {
git_oid oid;
uint32_t crc;
uint32_t offset;
uint64_t offset_long;
};
struct git_indexer_stream {
unsigned int parsed_header :1,
opened_pack :1,
have_stream :1,
have_delta :1;
struct git_pack_file *pack;
git_filebuf pack_file;
git_off_t off;
git_off_t entry_start;
git_packfile_stream stream;
size_t nr_objects;
git_vector objects;
git_vector deltas;
unsigned int fanout[256];
git_hash_ctx hash_ctx;
git_oid hash;
git_transfer_progress_callback progress_cb;
void *progress_payload;
char objbuf[8*1024];
};
struct delta_info {
git_off_t delta_off;
};
const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx)
{
return &idx->hash;
}
static int open_pack(struct git_pack_file **out, const char *filename)
{
struct git_pack_file *pack;
if (git_packfile_alloc(&pack, filename) < 0)
return -1;
if ((pack->mwf.fd = p_open(pack->pack_name, O_RDONLY)) < 0) {
giterr_set(GITERR_OS, "Failed to open packfile.");
git_packfile_free(pack);
return -1;
}
*out = pack;
return 0;
}
static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
{
int error;
/* Verify we recognize this pack file format. */
if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) {
giterr_set(GITERR_OS, "Failed to read in pack header");
return error;
}
if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
giterr_set(GITERR_INDEXER, "Wrong pack signature");
return -1;
}
if (!pack_version_ok(hdr->hdr_version)) {
giterr_set(GITERR_INDEXER, "Wrong pack version");
return -1;
}
return 0;
}
static int objects_cmp(const void *a, const void *b)
{
const struct entry *entrya = a;
const struct entry *entryb = b;
return git_oid__cmp(&entrya->oid, &entryb->oid);
}
int git_indexer_stream_new(
git_indexer_stream **out,
const char *prefix,
git_transfer_progress_callback progress_cb,
void *progress_payload)
{
git_indexer_stream *idx;
git_buf path = GIT_BUF_INIT;
static const char suff[] = "/pack";
int error;
idx = git__calloc(1, sizeof(git_indexer_stream));
GITERR_CHECK_ALLOC(idx);
idx->progress_cb = progress_cb;
idx->progress_payload = progress_payload;
error = git_buf_joinpath(&path, prefix, suff);
if (error < 0)
goto cleanup;
error = git_filebuf_open(&idx->pack_file, path.ptr,
GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER);
git_buf_free(&path);
if (error < 0)
goto cleanup;
*out = idx;
return 0;
cleanup:
git_buf_free(&path);
git_filebuf_cleanup(&idx->pack_file);
git__free(idx);
return -1;
}
/* Try to store the delta so we can try to resolve it later */
static int store_delta(git_indexer_stream *idx)
{
struct delta_info *delta;
delta = git__calloc(1, sizeof(struct delta_info));
GITERR_CHECK_ALLOC(delta);
delta->delta_off = idx->entry_start;
if (git_vector_insert(&idx->deltas, delta) < 0)
return -1;
return 0;
}
static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
{
char buffer[64];
size_t hdrlen;
hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
git_hash_update(ctx, buffer, hdrlen);
}
static int hash_object_stream(git_indexer_stream *idx, git_packfile_stream *stream)
{
ssize_t read;
assert(idx && stream);
do {
if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
break;
git_hash_update(&idx->hash_ctx, idx->objbuf, read);
} while (read > 0);
if (read < 0)
return (int)read;
return 0;
}
/* In order to create the packfile stream, we need to skip over the delta base description */
static int advance_delta_offset(git_indexer_stream *idx, git_otype type)
{
git_mwindow *w = NULL;
assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
if (type == GIT_OBJ_REF_DELTA) {
idx->off += GIT_OID_RAWSZ;
} else {
git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
git_mwindow_close(&w);
if (base_off < 0)
return (int)base_off;
}
return 0;
}
/* Read from the stream and discard any output */
static int read_object_stream(git_indexer_stream *idx, git_packfile_stream *stream)
{
ssize_t read;
assert(stream);
do {
read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
} while (read > 0);
if (read < 0)
return (int)read;
return 0;
}
static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
{
void *ptr;
uint32_t crc;
unsigned int left, len;
git_mwindow *w = NULL;
crc = crc32(0L, Z_NULL, 0);
while (size) {
ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
if (ptr == NULL)
return -1;
len = min(left, (unsigned int)size);
crc = crc32(crc, ptr, len);
size -= len;
start += len;
git_mwindow_close(&w);
}
*crc_out = htonl(crc);
return 0;
}
static int store_object(git_indexer_stream *idx)
{
int i, error;
khiter_t k;
git_oid oid;
struct entry *entry;
git_off_t entry_size;
struct git_pack_entry *pentry;
git_hash_ctx *ctx = &idx->hash_ctx;
git_off_t entry_start = idx->entry_start;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
pentry = git__calloc(1, sizeof(struct git_pack_entry));
GITERR_CHECK_ALLOC(pentry);
git_hash_final(&oid, ctx);
entry_size = idx->off - entry_start;
if (entry_start > UINT31_MAX) {
entry->offset = UINT32_MAX;
entry->offset_long = entry_start;
} else {
entry->offset = (uint32_t)entry_start;
}
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
if (!error) {
git__free(pentry);
goto on_error;
}
kh_value(idx->pack->idx_cache, k) = pentry;
git_oid_cpy(&entry->oid, &oid);
if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
goto on_error;
/* Add the object to the list */
if (git_vector_insert(&idx->objects, entry) < 0)
goto on_error;
for (i = oid.id[0]; i < 256; ++i) {
idx->fanout[i]++;
}
return 0;
on_error:
git__free(entry);
return -1;
}
static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
int i, error;
khiter_t k;
git_oid oid;
size_t entry_size;
struct entry *entry;
struct git_pack_entry *pentry;
entry = git__calloc(1, sizeof(*entry));
GITERR_CHECK_ALLOC(entry);
if (entry_start > UINT31_MAX) {
entry->offset = UINT32_MAX;
entry->offset_long = entry_start;
} else {
entry->offset = (uint32_t)entry_start;
}
/* FIXME: Parse the object instead of hashing it */
if (git_odb__hashobj(&oid, obj) < 0) {
giterr_set(GITERR_INDEXER, "Failed to hash object");
return -1;
}
pentry = git__calloc(1, sizeof(struct git_pack_entry));
GITERR_CHECK_ALLOC(pentry);
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
if (!error) {
git__free(pentry);
goto on_error;
}
kh_value(idx->pack->idx_cache, k) = pentry;
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
entry_size = (size_t)(idx->off - entry_start);
if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
goto on_error;
/* Add the object to the list */
if (git_vector_insert(&idx->objects, entry) < 0)
goto on_error;
for (i = oid.id[0]; i < 256; ++i) {
idx->fanout[i]++;
}
return 0;
on_error:
git__free(entry);
git__free(obj->data);
return -1;
}
static int do_progress_callback(git_indexer_stream *idx, git_transfer_progress *stats)
{
if (!idx->progress_cb) return 0;
return idx->progress_cb(stats, idx->progress_payload);
}
int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats)
{
int error = -1;
struct git_pack_header hdr;
size_t processed;
git_mwindow_file *mwf = &idx->pack->mwf;
assert(idx && data && stats);
processed = stats->indexed_objects;
if (git_filebuf_write(&idx->pack_file, data, size) < 0)
return -1;
/* Make sure we set the new size of the pack */
if (idx->opened_pack) {
idx->pack->mwf.size += size;
} else {
if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0)
return -1;
idx->opened_pack = 1;
mwf = &idx->pack->mwf;
if (git_mwindow_file_register(&idx->pack->mwf) < 0)
return -1;
}
if (!idx->parsed_header) {
unsigned int total_objects;
if ((unsigned)idx->pack->mwf.size < sizeof(hdr))
return 0;
if (parse_header(&hdr, idx->pack) < 0)
return -1;
idx->parsed_header = 1;
idx->nr_objects = ntohl(hdr.hdr_entries);
idx->off = sizeof(struct git_pack_header);
/* for now, limit to 2^32 objects */
assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
total_objects = (unsigned int)idx->nr_objects;
else
total_objects = UINT_MAX;
idx->pack->idx_cache = git_oidmap_alloc();
GITERR_CHECK_ALLOC(idx->pack->idx_cache);
idx->pack->has_cache = 1;
if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
return -1;
if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
return -1;
stats->received_objects = 0;
processed = stats->indexed_objects = 0;
stats->total_objects = total_objects;
do_progress_callback(idx, stats);
}
/* Now that we have data in the pack, let's try to parse it */
/* As the file grows any windows we try to use will be out of date */
git_mwindow_free_all(mwf);
while (processed < idx->nr_objects) {
git_packfile_stream *stream = &idx->stream;
git_off_t entry_start = idx->off;
size_t entry_size;
git_otype type;
git_mwindow *w = NULL;
if (idx->pack->mwf.size <= idx->off + 20)
return 0;
if (!idx->have_stream) {
error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
if (error == GIT_EBUFS) {
idx->off = entry_start;
return 0;
}
if (error < 0)
return -1;
git_mwindow_close(&w);
idx->entry_start = entry_start;
git_hash_ctx_init(&idx->hash_ctx);
if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
error = advance_delta_offset(idx, type);
if (error == GIT_EBUFS) {
idx->off = entry_start;
return 0;
}
if (error < 0)
return -1;
idx->have_delta = 1;
} else {
idx->have_delta = 0;
hash_header(&idx->hash_ctx, entry_size, type);
}
idx->have_stream = 1;
if (git_packfile_stream_open(stream, idx->pack, idx->off) < 0)
goto on_error;
}
if (idx->have_delta) {
error = read_object_stream(idx, stream);
} else {
error = hash_object_stream(idx, stream);
}
idx->off = stream->curpos;
if (error == GIT_EBUFS)
return 0;
/* We want to free the stream reasorces no matter what here */
idx->have_stream = 0;
git_packfile_stream_free(stream);
if (error < 0)
goto on_error;
if (idx->have_delta) {
error = store_delta(idx);
} else {
error = store_object(idx);
}
if (error < 0)
goto on_error;
if (!idx->have_delta) {
stats->indexed_objects = (unsigned int)++processed;
}
stats->received_objects++;
if (do_progress_callback(idx, stats) != 0) {
error = GIT_EUSER;
goto on_error;
}
}
return 0;
on_error:
git_mwindow_free_all(mwf);
return error;
}
static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix)
{
const char prefix[] = "pack-";
size_t slash = (size_t)path->size;
/* search backwards for '/' */
while (slash > 0 && path->ptr[slash - 1] != '/')
slash--;
if (git_buf_grow(path, slash + 1 + strlen(prefix) +
GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
return -1;
git_buf_truncate(path, slash);
git_buf_puts(path, prefix);
git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
path->size += GIT_OID_HEXSZ;
git_buf_puts(path, suffix);
return git_buf_oom(path) ? -1 : 0;
}
static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats)
{
unsigned int i;
struct delta_info *delta;
git_vector_foreach(&idx->deltas, i, delta) {
git_rawobj obj;
idx->off = delta->delta_off;
if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
return -1;
if (hash_and_save(idx, &obj, delta->delta_off) < 0)
return -1;
git__free(obj.data);
stats->indexed_objects++;
do_progress_callback(idx, stats);
}
return 0;
}
int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *stats)
{
git_mwindow *w = NULL;
unsigned int i, long_offsets = 0, left;
struct git_pack_idx_header hdr;
git_buf filename = GIT_BUF_INIT;
struct entry *entry;
void *packfile_hash;
git_oid file_hash;
git_hash_ctx ctx;
git_filebuf index_file = {0};
if (git_hash_ctx_init(&ctx) < 0)
return -1;
/* Test for this before resolve_deltas(), as it plays with idx->off */
if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) {
giterr_set(GITERR_INDEXER, "Indexing error: unexpected data at the end of the pack");
return -1;
}
if (idx->deltas.length > 0)
if (resolve_deltas(idx, stats) < 0)
return -1;
if (stats->indexed_objects != stats->total_objects) {
giterr_set(GITERR_INDEXER, "Indexing error: early EOF");
return -1;
}
git_vector_sort(&idx->objects);
git_buf_sets(&filename, idx->pack->pack_name);
git_buf_truncate(&filename, filename.size - strlen("pack"));
git_buf_puts(&filename, "idx");
if (git_buf_oom(&filename))
return -1;
if (git_filebuf_open(&index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0)
goto on_error;
/* Write out the header */
hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
hdr.idx_version = htonl(2);
git_filebuf_write(&index_file, &hdr, sizeof(hdr));
/* Write out the fanout table */
for (i = 0; i < 256; ++i) {
uint32_t n = htonl(idx->fanout[i]);
git_filebuf_write(&index_file, &n, sizeof(n));
}
/* Write out the object names (SHA-1 hashes) */
git_vector_foreach(&idx->objects, i, entry) {
git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
}
git_hash_final(&idx->hash, &ctx);
/* Write out the CRC32 values */
git_vector_foreach(&idx->objects, i, entry) {
git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
}
/* Write out the offsets */
git_vector_foreach(&idx->objects, i, entry) {
uint32_t n;
if (entry->offset == UINT32_MAX)
n = htonl(0x80000000 | long_offsets++);
else
n = htonl(entry->offset);
git_filebuf_write(&index_file, &n, sizeof(uint32_t));
}
/* Write out the long offsets */
git_vector_foreach(&idx->objects, i, entry) {
uint32_t split[2];
if (entry->offset != UINT32_MAX)
continue;
split[0] = htonl(entry->offset_long >> 32);
split[1] = htonl(entry->offset_long & 0xffffffff);
git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
}
/* Write out the packfile trailer */
packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
if (packfile_hash == NULL) {
git_mwindow_close(&w);
goto on_error;
}
memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
git_mwindow_close(&w);
git_filebuf_write(&index_file, &file_hash, sizeof(git_oid));
/* Write out the packfile trailer to the idx file as well */
if (git_filebuf_hash(&file_hash, &index_file) < 0)
goto on_error;
git_filebuf_write(&index_file, &file_hash, sizeof(git_oid));
/* Figure out what the final name should be */
if (index_path_stream(&filename, idx, ".idx") < 0)
goto on_error;
/* Commit file */
if (git_filebuf_commit_at(&index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
goto on_error;
git_mwindow_free_all(&idx->pack->mwf);
/* We need to close the descriptor here so Windows doesn't choke on commit_at */
p_close(idx->pack->mwf.fd);
idx->pack->mwf.fd = -1;
if (index_path_stream(&filename, idx, ".pack") < 0)
goto on_error;
/* And don't forget to rename the packfile to its new place. */
if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
return -1;
git_buf_free(&filename);
return 0;
on_error:
git_mwindow_free_all(&idx->pack->mwf);
git_filebuf_cleanup(&index_file);
git_buf_free(&filename);
git_hash_ctx_cleanup(&ctx);
return -1;
}
void git_indexer_stream_free(git_indexer_stream *idx)
{
khiter_t k;
unsigned int i;
struct entry *e;
struct delta_info *delta;
if (idx == NULL)
return;
git_vector_foreach(&idx->objects, i, e)
git__free(e);
git_vector_free(&idx->objects);
if (idx->pack) {
for (k = kh_begin(idx->pack->idx_cache); k != kh_end(idx->pack->idx_cache); k++) {
if (kh_exist(idx->pack->idx_cache, k))
git__free(kh_value(idx->pack->idx_cache, k));
}
git_oidmap_free(idx->pack->idx_cache);
}
git_vector_foreach(&idx->deltas, i, delta)
git__free(delta);
git_vector_free(&idx->deltas);
git_packfile_free(idx->pack);
git_filebuf_cleanup(&idx->pack_file);
git__free(idx);
}