Merge pull request #4258 from pks-t/pks/sha1dc-update

SHA1DC update
diff --git a/src/index.c b/src/index.c
index 932a530..c29e90f 100644
--- a/src/index.c
+++ b/src/index.c
@@ -54,10 +54,6 @@
 				  unsigned int flags,
 				  git_index_matched_path_cb cb, void *payload);
 
-#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
-#define short_entry_size(len) entry_size(struct entry_short, len)
-#define long_entry_size(len) entry_size(struct entry_long, len)
-
 #define minimal_entry_size (offsetof(struct entry_short, path))
 
 static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ;
@@ -2282,12 +2278,29 @@
 	return 0;
 }
 
+static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flags)
+{
+	if (varint_len) {
+		if (flags & GIT_IDXENTRY_EXTENDED)
+			return offsetof(struct entry_long, path) + path_len + 1 + varint_len;
+		else
+			return offsetof(struct entry_short, path) + path_len + 1 + varint_len;
+	} else {
+#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
+		if (flags & GIT_IDXENTRY_EXTENDED)
+			return entry_size(struct entry_long, path_len);
+		else
+			return entry_size(struct entry_short, path_len);
+#undef entry_size
+	}
+}
+
 static size_t read_entry(
 	git_index_entry **out,
 	git_index *index,
 	const void *buffer,
 	size_t buffer_size,
-	const char **last)
+	const char *last)
 {
 	size_t path_length, entry_size;
 	const char *path_ptr;
@@ -2344,35 +2357,34 @@
 			path_length = path_end - path_ptr;
 		}
 
-		if (entry.flags & GIT_IDXENTRY_EXTENDED)
-			entry_size = long_entry_size(path_length);
-		else
-			entry_size = short_entry_size(path_length);
-
-		if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
-			return 0;
-
+		entry_size = index_entry_size(path_length, 0, entry.flags);
 		entry.path = (char *)path_ptr;
 	} else {
 		size_t varint_len;
-		size_t shared = git_decode_varint((const unsigned char *)path_ptr, 
-						  &varint_len);
-		size_t len = strlen(path_ptr + varint_len);
-		size_t last_len = strlen(*last);
-		size_t tmp_path_len;
+		size_t strip_len = git_decode_varint((const unsigned char *)path_ptr,
+						     &varint_len);
+		size_t last_len = strlen(last);
+		size_t prefix_len = last_len - strip_len;
+		size_t suffix_len = strlen(path_ptr + varint_len);
+		size_t path_len;
 
 		if (varint_len == 0)
 			return index_error_invalid("incorrect prefix length");
 
-		GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1);
-		tmp_path = git__malloc(tmp_path_len);
+		GITERR_CHECK_ALLOC_ADD(&path_len, prefix_len, suffix_len);
+		GITERR_CHECK_ALLOC_ADD(&path_len, path_len, 1);
+		tmp_path = git__malloc(path_len);
 		GITERR_CHECK_ALLOC(tmp_path);
-		memcpy(tmp_path, last, last_len);
-		memcpy(tmp_path + last_len, path_ptr + varint_len, len);
-		entry_size = long_entry_size(shared + len);
+
+		memcpy(tmp_path, last, prefix_len);
+		memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1);
+		entry_size = index_entry_size(suffix_len, varint_len, entry.flags);
 		entry.path = tmp_path;
 	}
 
+	if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
+		return 0;
+
 	if (index_entry_dup(out, index, &entry) < 0) {
 		git__free(tmp_path);
 		return 0;
@@ -2445,7 +2457,7 @@
 	unsigned int i;
 	struct index_header header = { 0 };
 	git_oid checksum_calculated, checksum_expected;
-	const char **last = NULL;
+	const char *last = NULL;
 	const char *empty = "";
 
 #define seek_forward(_increase) { \
@@ -2469,7 +2481,7 @@
 
 	index->version = header.version;
 	if (index->version >= INDEX_VERSION_NUMBER_COMP)
-		last = &empty;
+		last = empty;
 
 	seek_forward(INDEX_HEADER_SIZE);
 
@@ -2504,6 +2516,9 @@
 		}
 		error = 0;
 
+		if (index->version >= INDEX_VERSION_NUMBER_COMP)
+			last = entry->path;
+
 		seek_forward(entry_size);
 	}
 
@@ -2574,11 +2589,12 @@
 	return (extended > 0);
 }
 
-static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last)
+static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char *last)
 {
 	void *mem = NULL;
 	struct entry_short *ondisk;
 	size_t path_len, disk_size;
+	int varint_len = 0;
 	char *path;
 	const char *path_start = entry->path;
 	size_t same_len = 0;
@@ -2586,7 +2602,7 @@
 	path_len = ((struct entry_internal *)entry)->pathlen;
 
 	if (last) {
-		const char *last_c = *last;
+		const char *last_c = last;
 
 		while (*path_start == *last_c) {
 			if (!*path_start || !*last_c)
@@ -2596,13 +2612,10 @@
 			++same_len;
 		}
 		path_len -= same_len;
-		*last = entry->path;
+		varint_len = git_encode_varint(NULL, 0, same_len);
 	}
 
-	if (entry->flags & GIT_IDXENTRY_EXTENDED)
-		disk_size = long_entry_size(path_len);
-	else
-		disk_size = short_entry_size(path_len);
+	disk_size = index_entry_size(path_len, varint_len, entry->flags);
 
 	if (git_filebuf_reserve(file, &mem, disk_size) < 0)
 		return -1;
@@ -2642,16 +2655,34 @@
 		ondisk_ext->flags_extended = htons(entry->flags_extended &
 			GIT_IDXENTRY_EXTENDED_FLAGS);
 		path = ondisk_ext->path;
-	}
-	else
+		disk_size -= offsetof(struct entry_long, path);
+	} else {
 		path = ondisk->path;
+		disk_size -= offsetof(struct entry_short, path);
+	}
 
 	if (last) {
-		path += git_encode_varint((unsigned char *) path,
-					  disk_size,
-					  path_len - same_len);
+		varint_len = git_encode_varint((unsigned char *) path,
+					  disk_size, same_len);
+		assert(varint_len > 0);
+		path += varint_len;
+		disk_size -= varint_len;
+
+		/*
+		 * If using path compression, we are not allowed
+		 * to have additional trailing NULs.
+		 */
+		assert(disk_size == path_len + 1);
+	} else {
+		/*
+		 * If no path compression is used, we do have
+		 * NULs as padding. As such, simply assert that
+		 * we have enough space left to write the path.
+		 */
+		assert(disk_size > path_len);
 	}
-	memcpy(path, path_start, path_len);
+
+	memcpy(path, path_start, path_len + 1);
 
 	return 0;
 }
@@ -2662,8 +2693,7 @@
 	size_t i;
 	git_vector case_sorted, *entries;
 	git_index_entry *entry;
-	const char **last = NULL;
-	const char *empty = "";
+	const char *last = NULL;
 
 	/* If index->entries is sorted case-insensitively, then we need
 	 * to re-sort it case-sensitively before writing */
@@ -2676,11 +2706,14 @@
 	}
 
 	if (index->version >= INDEX_VERSION_NUMBER_COMP)
-		last = &empty;
+		last = "";
 
-	git_vector_foreach(entries, i, entry)
+	git_vector_foreach(entries, i, entry) {
 		if ((error = write_disk_entry(file, entry, last)) < 0)
 			break;
+		if (index->version >= INDEX_VERSION_NUMBER_COMP)
+			last = entry->path;
+	}
 
 	if (index->ignore_case)
 		git_vector_free(&case_sorted);
diff --git a/src/varint.c b/src/varint.c
index 2f86860..beac8c7 100644
--- a/src/varint.c
+++ b/src/varint.c
@@ -36,7 +36,7 @@
 	while (value >>= 7)
 		varint[--pos] = 128 | (--value & 127);
 	if (buf) {
-		if (bufsize < pos)
+		if (bufsize < (sizeof(varint) - pos))
 			return -1;
 		memcpy(buf, varint + pos, sizeof(varint) - pos);
 	}
diff --git a/tests/core/encoding.c b/tests/core/encoding.c
index 7d91720..a677afe 100644
--- a/tests/core/encoding.c
+++ b/tests/core/encoding.c
@@ -29,6 +29,9 @@
 	cl_assert(git_encode_varint(buf, 100, 65) == 1);
 	cl_assert(buf[0] == 'A');
 
+	cl_assert(git_encode_varint(buf, 1, 1) == 1);
+	cl_assert(!memcmp(buf, "\x01", 1));
+
 	cl_assert(git_encode_varint(buf, 100, 267869656) == 4);
 	cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4));
 
diff --git a/tests/index/version.c b/tests/index/version.c
index 3fd240d..7ada302 100644
--- a/tests/index/version.c
+++ b/tests/index/version.c
@@ -3,39 +3,135 @@
 
 static git_repository *g_repo = NULL;
 
+void test_index_version__cleanup(void)
+{
+        cl_git_sandbox_cleanup();
+        g_repo = NULL;
+}
+
+void test_index_version__can_read_v4(void)
+{
+	const char *paths[] = {
+	    "file.tx", "file.txt", "file.txz", "foo", "zzz",
+	};
+	git_index *index;
+	size_t i;
+
+	g_repo = cl_git_sandbox_init("indexv4");
+
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_assert_equal_sz(git_index_entrycount(index), 5);
+
+	for (i = 0; i < ARRAY_SIZE(paths); i++) {
+		const git_index_entry *entry =
+		    git_index_get_bypath(index, paths[i], GIT_INDEX_STAGE_NORMAL);
+
+		cl_assert(entry != NULL);
+	}
+
+	git_index_free(index);
+}
+
 void test_index_version__can_write_v4(void)
 {
+	const char *paths[] = {
+	    "foo",
+	    "foox",
+	    "foobar",
+	    "foobal",
+	    "x",
+	    "xz",
+	    "xyzzyx"
+	};
+	git_index_entry entry;
 	git_index *index;
-	const git_index_entry *entry;
+	size_t i;
 
-	g_repo = cl_git_sandbox_init("filemodes");
+	g_repo = cl_git_sandbox_init("empty_standard_repo");
 	cl_git_pass(git_repository_index(&index, g_repo));
-
-	cl_assert(index->on_disk);
-	cl_assert(git_index_version(index) == 2);
-
-	cl_assert(git_index_entrycount(index) == 6);
-
 	cl_git_pass(git_index_set_version(index, 4));
 
+	for (i = 0; i < ARRAY_SIZE(paths); i++) {
+		memset(&entry, 0, sizeof(entry));
+		entry.path = paths[i];
+		entry.mode = GIT_FILEMODE_BLOB;
+		cl_git_pass(git_index_add_frombuffer(index, &entry, paths[i],
+						     strlen(paths[i]) + 1));
+	}
+	cl_assert_equal_sz(git_index_entrycount(index), ARRAY_SIZE(paths));
+
 	cl_git_pass(git_index_write(index));
 	git_index_free(index);
 
 	cl_git_pass(git_repository_index(&index, g_repo));
 	cl_assert(git_index_version(index) == 4);
 
-	entry = git_index_get_bypath(index, "exec_off", 0);
-	cl_assert(entry);
-	entry = git_index_get_bypath(index, "exec_off2on_staged", 0);
-	cl_assert(entry);
-	entry = git_index_get_bypath(index, "exec_on", 0);
-	cl_assert(entry);
+	for (i = 0; i < ARRAY_SIZE(paths); i++) {
+		const git_index_entry *e;
+
+		cl_assert(e = git_index_get_bypath(index, paths[i], 0));
+		cl_assert_equal_s(paths[i], e->path);
+	}
 
 	git_index_free(index);
 }
 
-void test_index_version__cleanup(void)
+void test_index_version__v4_uses_path_compression(void)
 {
-        cl_git_sandbox_cleanup();
-        g_repo = NULL;
+	git_index_entry entry;
+	git_index *index;
+	char path[250], buf[1];
+	struct stat st;
+	char i, j;
+
+	memset(path, 'a', sizeof(path));
+	memset(buf, 'a', sizeof(buf));
+
+	memset(&entry, 0, sizeof(entry));
+	entry.path = path;
+	entry.mode = GIT_FILEMODE_BLOB;
+
+	g_repo = cl_git_sandbox_init("indexv4");
+	cl_git_pass(git_repository_index(&index, g_repo));
+
+	/* write 676 paths of 250 bytes length */
+	for (i = 'a'; i <= 'z'; i++) {
+		for (j = 'a'; j < 'z'; j++) {
+			path[ARRAY_SIZE(path) - 3] = i;
+			path[ARRAY_SIZE(path) - 2] = j;
+			path[ARRAY_SIZE(path) - 1] = '\0';
+			cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf)));
+		}
+	}
+
+	cl_git_pass(git_index_write(index));
+	cl_git_pass(p_stat(git_index_path(index), &st));
+
+	/*
+	 * Without path compression, the written paths would at
+	 * least take
+	 *
+	 *    (entries * pathlen) = len
+	 *    (676 * 250) = 169000
+	 *
+	 *  bytes. As index v4 uses suffix-compression and our
+	 *  written paths only differ in the last two entries,
+	 *  this number will be much smaller, e.g.
+	 *
+	 *    (1 * pathlen) + (675 * 2) = len
+	 *    676 + 1350 = 2026
+	 *
+	 *    bytes.
+	 *
+	 *    Note that the above calculations do not include
+	 *    additional metadata of the index, e.g. OIDs or
+	 *    index extensions. Including those we get an index
+	 *    of approx. 200kB without compression and 40kB with
+	 *    compression. As this is a lot smaller than without
+	 *    compression, we can verify that path compression is
+	 *    used.
+	 */
+	cl_assert_(st.st_size < 75000, "path compression not enabled");
+
+	git_index_free(index);
 }
diff --git a/tests/resources/indexv4/.gitted/HEAD b/tests/resources/indexv4/.gitted/HEAD
new file mode 100644
index 0000000..cb089cd
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/tests/resources/indexv4/.gitted/config b/tests/resources/indexv4/.gitted/config
new file mode 100644
index 0000000..515f483
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/config
@@ -0,0 +1,5 @@
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = false
+	logallrefupdates = true
diff --git a/tests/resources/indexv4/.gitted/index b/tests/resources/indexv4/.gitted/index
new file mode 100644
index 0000000..e8fc617
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/index
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99
new file mode 100644
index 0000000..cedd594
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7
new file mode 100644
index 0000000..0ddc1d1
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
new file mode 100644
index 0000000..7112238
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/refs/heads/master b/tests/resources/indexv4/.gitted/refs/heads/master
new file mode 100644
index 0000000..f3e960e
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/refs/heads/master
@@ -0,0 +1 @@
+b0952dbb50bed5f01e03e31b296184cb183e54a7
diff --git a/tests/resources/indexv4/file.tx b/tests/resources/indexv4/file.tx
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/resources/indexv4/file.tx
diff --git a/tests/resources/indexv4/file.txt b/tests/resources/indexv4/file.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/resources/indexv4/file.txt
diff --git a/tests/resources/indexv4/file.txz b/tests/resources/indexv4/file.txz
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/resources/indexv4/file.txz
diff --git a/tests/resources/indexv4/foo b/tests/resources/indexv4/foo
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/resources/indexv4/foo
diff --git a/tests/resources/indexv4/zzz b/tests/resources/indexv4/zzz
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/resources/indexv4/zzz