Add git_diff_commit and last-changed example

This adds a new diff API `git_diff_commit` which makes it easy to
generate a `git_diff` object that represents the changes in a given
commit.  It follows the core Git rules for considering changes in
a merge commit - i.e. if a file has an exact match in any parent
of the commit, then the file is considered unmodified by the merge.

This also adds a new example program "last-changed" which takes a
list of filenames and for each one displays the SHA of the last
commit that changed that file.
diff --git a/examples/Makefile b/examples/Makefile
index e866b7f..3d6da7f 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -3,7 +3,7 @@
 CC = gcc
 CFLAGS = -g -I../include -I../src -Wall -Wextra -Wmissing-prototypes -Wno-missing-field-initializers
 LFLAGS = -L../build -lgit2 -lz
-APPS = general showindex diff rev-list cat-file status log rev-parse init blame tag
+APPS = general showindex diff rev-list cat-file status log rev-parse init blame tag last-changed
 
 all: $(APPS)
 
diff --git a/examples/last-changed.c b/examples/last-changed.c
new file mode 100644
index 0000000..fd9d278
--- /dev/null
+++ b/examples/last-changed.c
@@ -0,0 +1,138 @@
+/*
+ * libgit2 "last-changed" example - get last commit modifying a file
+ *
+ * Written by the libgit2 contributors
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide. This software is distributed without any warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication along
+ * with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#include "common.h"
+
+static void usage(void)
+{
+	fprintf(stderr, "usage: last-changed [--git-dir=DIR] pathname ...\n");
+	exit(1);
+}
+
+static int mark_pathspec_match(
+	const git_diff *, const git_diff_delta *, const char *, void *);
+
+typedef struct {
+	git_diff_options opts;
+	git_oid oid;
+	char str[GIT_OID_HEXSZ + 1];
+} change_info;
+
+int main(int argc, char *argv[])
+{
+	const char *repodir = ".";
+	change_info info = { GIT_DIFF_OPTIONS_INIT };
+	int start_pathspec = 1;
+	size_t i;
+	git_repository *repo;
+	git_revwalk *walker;
+
+	git_threads_init();
+
+	/* allow you to specific a git repo other than the current one */
+	if (argc > 1 && !strncmp(argv[1], "--git-dir=", strlen("--git-dir="))) {
+		repodir = argv[1] + strlen("--git-dir=");
+		start_pathspec++;
+	}
+
+	/* convert arguments to a "pathspec" of interesting files */
+	info.opts.pathspec.strings = &argv[start_pathspec];
+	info.opts.pathspec.count   = argc - start_pathspec;
+	if (!info.opts.pathspec.count)
+		usage();
+	info.opts.ignore_submodules = GIT_SUBMODULE_IGNORE_DIRTY |
+		GIT_DIFF_DISABLE_PATHSPEC_MATCH;
+	info.opts.notify_cb = mark_pathspec_match;
+	info.opts.notify_payload = &info;
+
+	/* create repo and walker */
+	check_lg2(git_repository_open_ext(&repo, repodir, 0, NULL),
+		"Could not open repository", repodir);
+	check_lg2(git_revwalk_new(&walker, repo),
+		"Could not create revision walker", NULL);
+
+	/* start at HEAD and walk backwards through time */
+	git_revwalk_sorting(walker, GIT_SORT_TOPOLOGICAL | GIT_SORT_TIME);
+	check_lg2(git_revwalk_push_head(walker),
+		"Could not find repository HEAD", NULL);
+
+	while (info.opts.pathspec.count > 0 &&
+		   !git_revwalk_next(&info.oid, walker))
+	{
+		git_commit *commit;
+		git_diff *diff;
+
+		git_oid_tostr(info.str, sizeof(info.str), &info.oid);
+
+		check_lg2(git_commit_lookup(&commit, repo, &info.oid),
+			"Failed to look up commit", NULL);
+		check_lg2(git_diff_commit(&diff, commit, &info.opts),
+			"Failed to get diff for commit", NULL);
+
+		/* notification callback will take care of reporting on
+		 * items in the diff and reducing the pathspec count
+		 */
+
+		git_diff_free(diff);
+		git_commit_free(commit);
+	}
+
+	for (i = 0; i < info.opts.pathspec.count; ++i) {
+		const char *path = info.opts.pathspec.strings[i];
+		if (path)
+			printf("never found %s\n", path);
+	}
+
+	git_revwalk_free(walker);
+	git_repository_free(repo);
+	git_threads_shutdown();
+
+	return 0;
+}
+
+static int mark_pathspec_match(
+	const git_diff *diff_so_far,
+	const git_diff_delta *delta_to_add,
+	const char *matched_pathspec,
+	void *payload)
+{
+	change_info *info = payload;
+	git_strarray *paths = &info->opts.pathspec;
+	size_t i;
+	int found = 0;
+
+	(void)diff_so_far; (void)delta_to_add;
+
+	for (i = 0; i < paths->count; ++i) {
+		/* remove matched item from list */
+		if (found)
+			paths->strings[i - 1] = paths->strings[i];
+		else if (!strcmp(paths->strings[i], matched_pathspec))
+			found = 1;
+	}
+
+	if (found) {
+		const char *verb = "modified";
+		if (delta_to_add->status == GIT_DELTA_ADDED)
+			verb = "added";
+		else if (delta_to_add->status == GIT_DELTA_DELETED)
+			verb = "deleted";
+
+		printf("%s has %s %s\n", info->str, verb, matched_pathspec);
+
+		paths->count--;
+	}
+
+	return 0;
+}
diff --git a/include/git2/diff.h b/include/git2/diff.h
index a0cfbc9..450fc3c 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -775,6 +775,25 @@
 	const git_diff_options *opts); /**< can be NULL for defaults */
 
 /**
+ * Create a diff of the changes between a commit and its parent(s).
+ *
+ * This generates a diff list between a commit and its parents.  For a
+ * non-merge commit, this is simply a shortcut for `git_diff_tree_to_tree`
+ * between the commit tree and the parent tree.  For a parent-less commit,
+ * this will generate a diff with the content of the commit tree.  For a
+ * merge commit, this generates a diff that contains only files that do
+ * not match any of the parents' trees.
+ *
+ * @param diff A pointer to a git_diff pointer that will be allocated.
+ * @param commit A git_commit object to diff from
+ * @param opts Structure with options to influence diff or NULL for defaults.
+ */
+GIT_EXTERN(int) git_diff_commit(
+	git_diff **diff,
+	git_commit *commit,
+	const git_diff_options *opts); /**< can be NULL for defaults */
+
+/**
  * Merge one diff into another.
  *
  * This merges items from the "from" list into the "onto" list.  The
diff --git a/src/diff.c b/src/diff.c
index 0d1aed4..c4e8141 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -20,16 +20,29 @@
 #define DIFF_FLAG_SET(DIFF,FLAG,VAL) (DIFF)->opts.flags = \
 	(VAL) ? ((DIFF)->opts.flags | (FLAG)) : ((DIFF)->opts.flags & ~(VAL))
 
+typedef struct {
+	git_diff_delta base;
+	git_diff_file  extra[GIT_FLEX_ARRAY];
+} git_diff_delta_merged;
+
 static git_diff_delta *diff_delta__alloc(
 	git_diff *diff,
 	git_delta_t status,
-	const char *path)
+	const git_index_entry *entry,
+	uint16_t nfiles)
 {
-	git_diff_delta *delta = git__calloc(1, sizeof(git_diff_delta));
+	git_diff_delta *delta;
+	size_t alloc_size = sizeof(git_diff_delta);
+
+	alloc_size = (nfiles > 2) ?
+		sizeof(git_diff_delta_merged) + sizeof(git_diff_file) * (nfiles - 2) :
+		sizeof(git_diff_delta);
+
+	delta = git__calloc(1, alloc_size);
 	if (!delta)
 		return NULL;
 
-	delta->old_file.path = git_pool_strdup(&diff->pool, path);
+	delta->old_file.path = git_pool_strdup(&diff->pool, entry->path);
 	if (delta->old_file.path == NULL) {
 		git__free(delta);
 		return NULL;
@@ -45,6 +58,26 @@
 		}
 	}
 	delta->status = status;
+	delta->nfiles = nfiles;
+
+	if (delta->status == GIT_DELTA_ADDED ||
+		delta->status == GIT_DELTA_IGNORED ||
+		delta->status == GIT_DELTA_UNTRACKED)
+	{
+		delta->new_file.mode = entry->mode;
+		delta->new_file.size = entry->file_size;
+		git_oid_cpy(&delta->new_file.id, &entry->id);
+	} else {
+		delta->old_file.mode = entry->mode;
+		delta->old_file.size = entry->file_size;
+		git_oid_cpy(&delta->old_file.id, &entry->id);
+	}
+
+	delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID;
+
+	if (delta->status == GIT_DELTA_DELETED ||
+		!git_oid_iszero(&delta->new_file.id))
+		delta->new_file.flags |= GIT_DIFF_FLAG_VALID_ID;
 
 	return delta;
 }
@@ -100,28 +133,11 @@
 			&matched_pathspec, NULL))
 		return 0;
 
-	delta = diff_delta__alloc(diff, status, entry->path);
-	GITERR_CHECK_ALLOC(delta);
-
 	/* This fn is just for single-sided diffs */
 	assert(status != GIT_DELTA_MODIFIED);
-	delta->nfiles = 1;
 
-	if (delta->status == GIT_DELTA_DELETED) {
-		delta->old_file.mode = entry->mode;
-		delta->old_file.size = entry->file_size;
-		git_oid_cpy(&delta->old_file.id, &entry->id);
-	} else /* ADDED, IGNORED, UNTRACKED */ {
-		delta->new_file.mode = entry->mode;
-		delta->new_file.size = entry->file_size;
-		git_oid_cpy(&delta->new_file.id, &entry->id);
-	}
-
-	delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID;
-
-	if (delta->status == GIT_DELTA_DELETED ||
-		!git_oid_iszero(&delta->new_file.id))
-		delta->new_file.flags |= GIT_DIFF_FLAG_VALID_ID;
+	delta = diff_delta__alloc(diff, status, entry, 1);
+	GITERR_CHECK_ALLOC(delta);
 
 	return diff_insert_delta(diff, delta, matched_pathspec);
 }
@@ -137,7 +153,6 @@
 	const char *matched_pathspec)
 {
 	git_diff_delta *delta;
-	const char *canonical_path = old_entry->path;
 
 	if (status == GIT_DELTA_UNMODIFIED &&
 		DIFF_FLAG_ISNT_SET(diff, GIT_DIFF_INCLUDE_UNMODIFIED))
@@ -152,14 +167,8 @@
 		new_mode = temp_mode;
 	}
 
-	delta = diff_delta__alloc(diff, status, canonical_path);
+	delta = diff_delta__alloc(diff, status, old_entry, 2);
 	GITERR_CHECK_ALLOC(delta);
-	delta->nfiles = 2;
-
-	git_oid_cpy(&delta->old_file.id, &old_entry->id);
-	delta->old_file.size = old_entry->file_size;
-	delta->old_file.mode = old_mode;
-	delta->old_file.flags |= GIT_DIFF_FLAG_VALID_ID;
 
 	git_oid_cpy(&delta->new_file.id, &new_entry->id);
 	delta->new_file.size = new_entry->file_size;
@@ -1669,3 +1678,212 @@
 		return 0;
 	}
 }
+
+static int diff_commit_nonmerge(
+	git_diff **diff_ptr,
+	git_commit *commit,
+	const git_diff_options *opts)
+{
+	int error = 0;
+	git_tree *tree = NULL, *parent_tree = NULL;
+
+	error = git_commit_tree(&tree, commit);
+
+	if (!error && git_commit_parentcount(commit) > 0) {
+		git_commit *parent = NULL;
+
+		if (!(error = git_commit_parent(&parent, commit, 0)))
+			error = git_commit_tree(&parent_tree, parent);
+
+		git_commit_free(parent);
+	}
+
+	if (!error)
+		error = git_diff_tree_to_tree(
+			diff_ptr, git_commit_owner(commit), parent_tree, tree, opts);
+
+	git_tree_free(parent_tree);
+	git_tree_free(tree);
+
+	return error;
+}
+
+static void free_commit_parent_trees_array(git_tree **parents)
+{
+	unsigned int p;
+	for (p = 0; parents[p] != NULL; ++p)
+		git_tree_free(parents[p]);
+	git__free(parents);
+}
+
+static int alloc_commit_parent_trees_array(
+	git_tree ***parents_ptr, git_commit *commit)
+{
+	int error = 0;
+	unsigned int p, nparents = git_commit_parentcount(commit);
+	git_tree **parents;
+
+	/* add extra element to give final NULL at end of array */
+	parents = git__calloc(nparents + 1, sizeof(git_tree *));
+	GITERR_CHECK_ALLOC(parents);
+
+	for (p = 0; p < nparents; ++p) {
+		git_commit *parent = NULL;
+
+		if (!(error = git_commit_parent(&parent, commit, p)))
+			error = git_commit_tree(&parents[p], parent);
+		git_commit_free(parent);
+
+		if (error < 0) {
+			free_commit_parent_trees_array(parents);
+			return error;
+		}
+	}
+
+	*parents_ptr = parents;
+	return 0;
+}
+
+static int match_parent_tree_entries(
+	unsigned int nparents, git_tree **parents, const git_index_entry *item)
+{
+	int error = GIT_ENOTFOUND;
+	unsigned int p;
+	git_tree_entry *te = NULL;
+
+	for (p = 0; error < 0 && p < nparents; ++p) {
+		if (git_tree_entry_bypath(&te, parents[p], item->path))
+			giterr_clear();
+		else {
+			error = git_oid_equal(git_tree_entry_id(te), &item->id) ?
+				(int)p : -1;
+			git_tree_entry_free(te);
+		}
+	}
+
+	return error;
+}
+
+int git_diff_commit(
+	git_diff **diff_ptr,
+	git_commit *commit,
+	const git_diff_options *opts)
+{
+	int error = 0;
+	unsigned int nparents = 0, p;
+	git_tree *tree = NULL, **parents = NULL;
+	git_iterator_flag_t iflag = GIT_ITERATOR_DONT_IGNORE_CASE;
+	char *pfx = NULL;
+	git_iterator *iter = NULL;
+	const git_index_entry *nitem;
+	git_diff *diff = NULL;
+
+	assert(diff_ptr && commit);
+	*diff_ptr = NULL;
+
+	/* this diff is a little bit different because if the commit has
+	 * multiple parents, this actually only wants to include files that
+	 * are do not match any of the parent trees.
+	 */
+
+	nparents = git_commit_parentcount(commit);
+	if (nparents <= 1)
+		return diff_commit_nonmerge(diff_ptr, commit, opts);
+
+	if ((error = git_commit_tree(&tree, commit)) < 0 ||
+		(error = alloc_commit_parent_trees_array(&parents, commit)) < 0)
+		goto cleanup;
+
+	/* now iterate over tree and look for each entry in all of parents */
+
+	if (opts) {
+		pfx = git_pathspec_prefix(&opts->pathspec);
+
+		if ((opts->flags & GIT_DIFF_IGNORE_CASE) != 0)
+			iflag = GIT_ITERATOR_IGNORE_CASE;
+	}
+
+	if ((error = git_iterator_for_tree(&iter, tree, iflag, pfx, pfx)) < 0)
+		goto cleanup;
+
+	if (!(diff = diff_list_alloc(git_commit_owner(commit), iter, iter)))
+		error = -1;
+	else
+		error = diff_list_apply_options(diff, opts);
+
+	while (!error && !(error = git_iterator_advance(&nitem, iter))) {
+		const char *matched_pathspec;
+		int matched_parent;
+		git_diff_delta *delta;
+
+		if (!git_pathspec__match(
+				&diff->pathspec, nitem->path,
+				DIFF_FLAG_IS_SET(diff, GIT_DIFF_DISABLE_PATHSPEC_MATCH),
+				DIFF_FLAG_IS_SET(diff, GIT_DIFF_IGNORE_CASE),
+				&matched_pathspec, NULL))
+			continue;
+
+		matched_parent = match_parent_tree_entries(nparents, parents, nitem);
+
+		/* don't record a diff if we found a parent that matched exactly */
+		if (matched_parent >= 0)
+			continue;
+
+		/* create a specialized delta */
+		delta = diff_delta__alloc(diff, GIT_DELTA_ADDED, nitem, nparents + 1);
+		if (!delta) {
+			error = -1;
+			break;
+		}
+
+		/* add other sides of delta as needed */
+		if (matched_parent != GIT_ENOTFOUND) {
+			git_tree_entry *te = NULL;
+			git_diff_file *f;
+
+			for (p = 0; error < 0 && p < nparents; ++p) {
+				if (!p)
+					f = (delta->status == GIT_DELTA_ADDED) ?
+						&delta->old_file : &delta->new_file;
+				else
+					f = &((git_diff_delta_merged *)delta)->extra[p - 1];
+
+				if (git_tree_entry_bypath(&te, parents[p], nitem->path)) {
+					giterr_clear();
+					f->flags |= GIT_DIFF_FLAG_VALID_ID;
+				} else {
+					f->mode = git_tree_entry_filemode(te);
+					f->size = 0;
+					git_oid_cpy(&f->id, git_tree_entry_id(te));
+					git_tree_entry_free(te);
+				}
+			}
+
+			delta->status = GIT_DELTA_MODIFIED;
+		}
+
+		error = diff_insert_delta(diff, delta, matched_pathspec);
+	}
+
+	if (error == GIT_ITEROVER)
+		error = 0;
+
+	/* TODO:
+	 * - walk all parents to look for DELETED items
+	 * - resort deltas after adding those
+	 */
+
+cleanup:
+	git_iterator_free(iter);
+	git__free(pfx);
+	git_tree_free(tree);
+	free_commit_parent_trees_array(parents);
+
+	if (!error)
+		*diff_ptr = diff;
+	else
+		git_diff_free(diff);
+
+	return error;
+}
+