| /* |
| * Copyright (C) the libgit2 contributors. All rights reserved. |
| * |
| * This file is part of libgit2, distributed under the GNU GPL v2 with |
| * a Linking Exception. For full terms see the included COPYING file. |
| */ |
| |
| #include "blame_git.h" |
| #include "commit.h" |
| #include "blob.h" |
| #include "xdiff/xinclude.h" |
| #include "diff_xdiff.h" |
| |
| /* |
| * Origin is refcounted and usually we keep the blob contents to be |
| * reused. |
| */ |
| static git_blame__origin *origin_incref(git_blame__origin *o) |
| { |
| if (o) |
| o->refcnt++; |
| return o; |
| } |
| |
| static void origin_decref(git_blame__origin *o) |
| { |
| if (o && --o->refcnt <= 0) { |
| if (o->previous) |
| origin_decref(o->previous); |
| git_blob_free(o->blob); |
| git_commit_free(o->commit); |
| git__free(o); |
| } |
| } |
| |
| /* Given a commit and a path in it, create a new origin structure. */ |
| static int make_origin(git_blame__origin **out, git_commit *commit, const char *path) |
| { |
| git_blame__origin *o; |
| size_t path_len = strlen(path), alloc_len; |
| int error = 0; |
| |
| GITERR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*o), path_len); |
| GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 1); |
| o = git__calloc(1, alloc_len); |
| GITERR_CHECK_ALLOC(o); |
| |
| o->commit = commit; |
| o->refcnt = 1; |
| strcpy(o->path, path); |
| |
| if (!(error = git_object_lookup_bypath((git_object**)&o->blob, (git_object*)commit, |
| path, GIT_OBJ_BLOB))) { |
| *out = o; |
| } else { |
| origin_decref(o); |
| } |
| return error; |
| } |
| |
| /* Locate an existing origin or create a new one. */ |
| int git_blame__get_origin( |
| git_blame__origin **out, |
| git_blame *blame, |
| git_commit *commit, |
| const char *path) |
| { |
| git_blame__entry *e; |
| |
| for (e = blame->ent; e; e = e->next) { |
| if (e->suspect->commit == commit && !strcmp(e->suspect->path, path)) { |
| *out = origin_incref(e->suspect); |
| } |
| } |
| return make_origin(out, commit, path); |
| } |
| |
| typedef struct blame_chunk_cb_data { |
| git_blame *blame; |
| git_blame__origin *target; |
| git_blame__origin *parent; |
| long tlno; |
| long plno; |
| }blame_chunk_cb_data; |
| |
| static bool same_suspect(git_blame__origin *a, git_blame__origin *b) |
| { |
| if (a == b) |
| return true; |
| if (git_oid_cmp(git_commit_id(a->commit), git_commit_id(b->commit))) |
| return false; |
| return 0 == strcmp(a->path, b->path); |
| } |
| |
| /* find the line number of the last line the target is suspected for */ |
| static bool find_last_in_target(size_t *out, git_blame *blame, git_blame__origin *target) |
| { |
| git_blame__entry *e; |
| size_t last_in_target = 0; |
| bool found = false; |
| |
| *out = 0; |
| |
| for (e=blame->ent; e; e=e->next) { |
| if (e->guilty || !same_suspect(e->suspect, target)) |
| continue; |
| if (last_in_target < e->s_lno + e->num_lines) { |
| found = true; |
| last_in_target = e->s_lno + e->num_lines; |
| } |
| } |
| |
| *out = last_in_target; |
| return found; |
| } |
| |
| /* |
| * It is known that lines between tlno to same came from parent, and e |
| * has an overlap with that range. it also is known that parent's |
| * line plno corresponds to e's line tlno. |
| * |
| * <---- e -----> |
| * <------> (entirely within) |
| * <------------> (extends past) |
| * <------------> (starts before) |
| * <------------------> (entirely encloses) |
| * |
| * Split e into potentially three parts; before this chunk, the chunk |
| * to be blamed for the parent, and after that portion. |
| */ |
| static void split_overlap(git_blame__entry *split, git_blame__entry *e, |
| size_t tlno, size_t plno, size_t same, git_blame__origin *parent) |
| { |
| size_t chunk_end_lno; |
| |
| if (e->s_lno < tlno) { |
| /* there is a pre-chunk part not blamed on the parent */ |
| split[0].suspect = origin_incref(e->suspect); |
| split[0].lno = e->lno; |
| split[0].s_lno = e->s_lno; |
| split[0].num_lines = tlno - e->s_lno; |
| split[1].lno = e->lno + tlno - e->s_lno; |
| split[1].s_lno = plno; |
| } else { |
| split[1].lno = e->lno; |
| split[1].s_lno = plno + (e->s_lno - tlno); |
| } |
| |
| if (same < e->s_lno + e->num_lines) { |
| /* there is a post-chunk part not blamed on parent */ |
| split[2].suspect = origin_incref(e->suspect); |
| split[2].lno = e->lno + (same - e->s_lno); |
| split[2].s_lno = e->s_lno + (same - e->s_lno); |
| split[2].num_lines = e->s_lno + e->num_lines - same; |
| chunk_end_lno = split[2].lno; |
| } else { |
| chunk_end_lno = e->lno + e->num_lines; |
| } |
| split[1].num_lines = chunk_end_lno - split[1].lno; |
| |
| /* |
| * if it turns out there is nothing to blame the parent for, forget about |
| * the splitting. !split[1].suspect signals this. |
| */ |
| if (split[1].num_lines < 1) |
| return; |
| split[1].suspect = origin_incref(parent); |
| } |
| |
| /* |
| * Link in a new blame entry to the scoreboard. Entries that cover the same |
| * line range have been removed from the scoreboard previously. |
| */ |
| static void add_blame_entry(git_blame *blame, git_blame__entry *e) |
| { |
| git_blame__entry *ent, *prev = NULL; |
| |
| origin_incref(e->suspect); |
| |
| for (ent = blame->ent; ent && ent->lno < e->lno; ent = ent->next) |
| prev = ent; |
| |
| /* prev, if not NULL, is the last one that is below e */ |
| e->prev = prev; |
| if (prev) { |
| e->next = prev->next; |
| prev->next = e; |
| } else { |
| e->next = blame->ent; |
| blame->ent = e; |
| } |
| if (e->next) |
| e->next->prev = e; |
| } |
| |
| /* |
| * src typically is on-stack; we want to copy the information in it to |
| * a malloced blame_entry that is already on the linked list of the scoreboard. |
| * The origin of dst loses a refcnt while the origin of src gains one. |
| */ |
| static void dup_entry(git_blame__entry *dst, git_blame__entry *src) |
| { |
| git_blame__entry *p, *n; |
| |
| p = dst->prev; |
| n = dst->next; |
| origin_incref(src->suspect); |
| origin_decref(dst->suspect); |
| memcpy(dst, src, sizeof(*src)); |
| dst->prev = p; |
| dst->next = n; |
| dst->score = 0; |
| } |
| |
| /* |
| * split_overlap() divided an existing blame e into up to three parts in split. |
| * Adjust the linked list of blames in the scoreboard to reflect the split. |
| */ |
| static void split_blame(git_blame *blame, git_blame__entry *split, git_blame__entry *e) |
| { |
| git_blame__entry *new_entry; |
| |
| if (split[0].suspect && split[2].suspect) { |
| /* The first part (reuse storage for the existing entry e */ |
| dup_entry(e, &split[0]); |
| |
| /* The last part -- me */ |
| new_entry = git__malloc(sizeof(*new_entry)); |
| memcpy(new_entry, &(split[2]), sizeof(git_blame__entry)); |
| add_blame_entry(blame, new_entry); |
| |
| /* ... and the middle part -- parent */ |
| new_entry = git__malloc(sizeof(*new_entry)); |
| memcpy(new_entry, &(split[1]), sizeof(git_blame__entry)); |
| add_blame_entry(blame, new_entry); |
| } else if (!split[0].suspect && !split[2].suspect) { |
| /* |
| * The parent covers the entire area; reuse storage for e and replace it |
| * with the parent |
| */ |
| dup_entry(e, &split[1]); |
| } else if (split[0].suspect) { |
| /* me and then parent */ |
| dup_entry(e, &split[0]); |
| new_entry = git__malloc(sizeof(*new_entry)); |
| memcpy(new_entry, &(split[1]), sizeof(git_blame__entry)); |
| add_blame_entry(blame, new_entry); |
| } else { |
| /* parent and then me */ |
| dup_entry(e, &split[1]); |
| new_entry = git__malloc(sizeof(*new_entry)); |
| memcpy(new_entry, &(split[2]), sizeof(git_blame__entry)); |
| add_blame_entry(blame, new_entry); |
| } |
| } |
| |
| /* |
| * After splitting the blame, the origins used by the on-stack blame_entry |
| * should lose one refcnt each. |
| */ |
| static void decref_split(git_blame__entry *split) |
| { |
| int i; |
| for (i=0; i<3; i++) |
| origin_decref(split[i].suspect); |
| } |
| |
| /* |
| * Helper for blame_chunk(). blame_entry e is known to overlap with the patch |
| * hunk; split it and pass blame to the parent. |
| */ |
| static void blame_overlap( |
| git_blame *blame, |
| git_blame__entry *e, |
| size_t tlno, |
| size_t plno, |
| size_t same, |
| git_blame__origin *parent) |
| { |
| git_blame__entry split[3] = {{0}}; |
| |
| split_overlap(split, e, tlno, plno, same, parent); |
| if (split[1].suspect) |
| split_blame(blame, split, e); |
| decref_split(split); |
| } |
| |
| /* |
| * Process one hunk from the patch between the current suspect for blame_entry |
| * e and its parent. Find and split the overlap, and pass blame to the |
| * overlapping part to the parent. |
| */ |
| static void blame_chunk( |
| git_blame *blame, |
| size_t tlno, |
| size_t plno, |
| size_t same, |
| git_blame__origin *target, |
| git_blame__origin *parent) |
| { |
| git_blame__entry *e; |
| |
| for (e = blame->ent; e; e = e->next) { |
| if (e->guilty || !same_suspect(e->suspect, target)) |
| continue; |
| if (same <= e->s_lno) |
| continue; |
| if (tlno < e->s_lno + e->num_lines) { |
| blame_overlap(blame, e, tlno, plno, same, parent); |
| } |
| } |
| } |
| |
| static int my_emit( |
| long start_a, long count_a, |
| long start_b, long count_b, |
| void *cb_data) |
| { |
| blame_chunk_cb_data *d = (blame_chunk_cb_data *)cb_data; |
| |
| blame_chunk(d->blame, d->tlno, d->plno, start_b, d->target, d->parent); |
| d->plno = start_a + count_a; |
| d->tlno = start_b + count_b; |
| |
| return 0; |
| } |
| |
| static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx) |
| { |
| const int blk = 1024; |
| long trimmed = 0, recovered = 0; |
| char *ap = a->ptr + a->size; |
| char *bp = b->ptr + b->size; |
| long smaller = (long)((a->size < b->size) ? a->size : b->size); |
| |
| if (ctx) |
| return; |
| |
| while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) { |
| trimmed += blk; |
| ap -= blk; |
| bp -= blk; |
| } |
| |
| while (recovered < trimmed) |
| if (ap[recovered++] == '\n') |
| break; |
| a->size -= trimmed - recovered; |
| b->size -= trimmed - recovered; |
| } |
| |
| static int diff_hunks(mmfile_t file_a, mmfile_t file_b, void *cb_data) |
| { |
| xpparam_t xpp = {0}; |
| xdemitconf_t xecfg = {0}; |
| xdemitcb_t ecb = {0}; |
| |
| xecfg.hunk_func = my_emit; |
| ecb.priv = cb_data; |
| |
| trim_common_tail(&file_a, &file_b, 0); |
| |
| if (file_a.size > GIT_XDIFF_MAX_SIZE || |
| file_b.size > GIT_XDIFF_MAX_SIZE) { |
| giterr_set(GITERR_INVALID, "file too large to blame"); |
| return -1; |
| } |
| |
| return xdl_diff(&file_a, &file_b, &xpp, &xecfg, &ecb); |
| } |
| |
| static void fill_origin_blob(git_blame__origin *o, mmfile_t *file) |
| { |
| memset(file, 0, sizeof(*file)); |
| if (o->blob) { |
| file->ptr = (char*)git_blob_rawcontent(o->blob); |
| file->size = (size_t)git_blob_rawsize(o->blob); |
| } |
| } |
| |
| static int pass_blame_to_parent( |
| git_blame *blame, |
| git_blame__origin *target, |
| git_blame__origin *parent) |
| { |
| size_t last_in_target; |
| mmfile_t file_p, file_o; |
| blame_chunk_cb_data d = { blame, target, parent, 0, 0 }; |
| |
| if (!find_last_in_target(&last_in_target, blame, target)) |
| return 1; /* nothing remains for this target */ |
| |
| fill_origin_blob(parent, &file_p); |
| fill_origin_blob(target, &file_o); |
| |
| if (diff_hunks(file_p, file_o, &d) < 0) |
| return -1; |
| |
| /* The reset (i.e. anything after tlno) are the same as the parent */ |
| blame_chunk(blame, d.tlno, d.plno, last_in_target, target, parent); |
| |
| return 0; |
| } |
| |
| static int paths_on_dup(void **old, void *new) |
| { |
| GIT_UNUSED(old); |
| git__free(new); |
| return -1; |
| } |
| |
| static git_blame__origin* find_origin( |
| git_blame *blame, |
| git_commit *parent, |
| git_blame__origin *origin) |
| { |
| git_blame__origin *porigin = NULL; |
| git_diff *difflist = NULL; |
| git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT; |
| git_tree *otree=NULL, *ptree=NULL; |
| |
| /* Get the trees from this commit and its parent */ |
| if (0 != git_commit_tree(&otree, origin->commit) || |
| 0 != git_commit_tree(&ptree, parent)) |
| goto cleanup; |
| |
| /* Configure the diff */ |
| diffopts.context_lines = 0; |
| diffopts.flags = GIT_DIFF_SKIP_BINARY_CHECK; |
| |
| /* Check to see if files we're interested have changed */ |
| diffopts.pathspec.count = blame->paths.length; |
| diffopts.pathspec.strings = (char**)blame->paths.contents; |
| if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts)) |
| goto cleanup; |
| |
| if (!git_diff_num_deltas(difflist)) { |
| /* No changes; copy data */ |
| git_blame__get_origin(&porigin, blame, parent, origin->path); |
| } else { |
| git_diff_find_options findopts = GIT_DIFF_FIND_OPTIONS_INIT; |
| int i; |
| |
| /* Generate a full diff between the two trees */ |
| git_diff_free(difflist); |
| diffopts.pathspec.count = 0; |
| if (0 != git_diff_tree_to_tree(&difflist, blame->repository, ptree, otree, &diffopts)) |
| goto cleanup; |
| |
| /* Let diff find renames */ |
| findopts.flags = GIT_DIFF_FIND_RENAMES; |
| if (0 != git_diff_find_similar(difflist, &findopts)) |
| goto cleanup; |
| |
| /* Find one that matches */ |
| for (i=0; i<(int)git_diff_num_deltas(difflist); i++) { |
| const git_diff_delta *delta = git_diff_get_delta(difflist, i); |
| |
| if (!git_vector_bsearch(NULL, &blame->paths, delta->new_file.path)) |
| { |
| git_vector_insert_sorted(&blame->paths, (void*)git__strdup(delta->old_file.path), |
| paths_on_dup); |
| make_origin(&porigin, parent, delta->old_file.path); |
| } |
| } |
| } |
| |
| cleanup: |
| git_diff_free(difflist); |
| git_tree_free(otree); |
| git_tree_free(ptree); |
| return porigin; |
| } |
| |
| /* |
| * The blobs of origin and porigin exactly match, so everything origin is |
| * suspected for can be blamed on the parent. |
| */ |
| static void pass_whole_blame(git_blame *blame, |
| git_blame__origin *origin, git_blame__origin *porigin) |
| { |
| git_blame__entry *e; |
| |
| if (!porigin->blob) |
| git_object_lookup((git_object**)&porigin->blob, blame->repository, |
| git_blob_id(origin->blob), GIT_OBJ_BLOB); |
| for (e=blame->ent; e; e=e->next) { |
| if (!same_suspect(e->suspect, origin)) |
| continue; |
| origin_incref(porigin); |
| origin_decref(e->suspect); |
| e->suspect = porigin; |
| } |
| } |
| |
| static int pass_blame(git_blame *blame, git_blame__origin *origin, uint32_t opt) |
| { |
| git_commit *commit = origin->commit; |
| int i, num_parents; |
| git_blame__origin *sg_buf[16]; |
| git_blame__origin *porigin, **sg_origin = sg_buf; |
| int ret, error = 0; |
| |
| num_parents = git_commit_parentcount(commit); |
| if (!git_oid_cmp(git_commit_id(commit), &blame->options.oldest_commit)) |
| /* Stop at oldest specified commit */ |
| num_parents = 0; |
| else if (opt & GIT_BLAME_FIRST_PARENT && num_parents > 1) |
| /* Limit search to the first parent */ |
| num_parents = 1; |
| |
| if (!num_parents) { |
| git_oid_cpy(&blame->options.oldest_commit, git_commit_id(commit)); |
| goto finish; |
| } |
| else if (num_parents < (int)ARRAY_SIZE(sg_buf)) |
| memset(sg_buf, 0, sizeof(sg_buf)); |
| else |
| sg_origin = git__calloc(num_parents, sizeof(*sg_origin)); |
| |
| for (i=0; i<num_parents; i++) { |
| git_commit *p; |
| int j, same; |
| |
| if (sg_origin[i]) |
| continue; |
| |
| if ((error = git_commit_parent(&p, origin->commit, i)) < 0) |
| goto finish; |
| porigin = find_origin(blame, p, origin); |
| |
| if (!porigin) |
| continue; |
| if (porigin->blob && origin->blob && |
| !git_oid_cmp(git_blob_id(porigin->blob), git_blob_id(origin->blob))) { |
| pass_whole_blame(blame, origin, porigin); |
| origin_decref(porigin); |
| goto finish; |
| } |
| for (j = same = 0; j<i; j++) |
| if (sg_origin[j] && |
| !git_oid_cmp(git_blob_id(sg_origin[j]->blob), git_blob_id(porigin->blob))) { |
| same = 1; |
| break; |
| } |
| if (!same) |
| sg_origin[i] = porigin; |
| else |
| origin_decref(porigin); |
| } |
| |
| /* Standard blame */ |
| for (i=0; i<num_parents; i++) { |
| git_blame__origin *porigin = sg_origin[i]; |
| if (!porigin) |
| continue; |
| if (!origin->previous) { |
| origin_incref(porigin); |
| origin->previous = porigin; |
| } |
| |
| if ((ret = pass_blame_to_parent(blame, origin, porigin)) != 0) { |
| if (ret < 0) |
| error = -1; |
| |
| goto finish; |
| } |
| } |
| |
| /* TODO: optionally find moves in parents' files */ |
| |
| /* TODO: optionally find copies in parents' files */ |
| |
| finish: |
| for (i=0; i<num_parents; i++) |
| if (sg_origin[i]) |
| origin_decref(sg_origin[i]); |
| if (sg_origin != sg_buf) |
| git__free(sg_origin); |
| return error; |
| } |
| |
| /* |
| * If two blame entries that are next to each other came from |
| * contiguous lines in the same origin (i.e. <commit, path> pair), |
| * merge them together. |
| */ |
| static void coalesce(git_blame *blame) |
| { |
| git_blame__entry *ent, *next; |
| |
| for (ent=blame->ent; ent && (next = ent->next); ent = next) { |
| if (same_suspect(ent->suspect, next->suspect) && |
| ent->guilty == next->guilty && |
| ent->s_lno + ent->num_lines == next->s_lno) |
| { |
| ent->num_lines += next->num_lines; |
| ent->next = next->next; |
| if (ent->next) |
| ent->next->prev = ent; |
| origin_decref(next->suspect); |
| git__free(next); |
| ent->score = 0; |
| next = ent; /* again */ |
| } |
| } |
| } |
| |
| int git_blame__like_git(git_blame *blame, uint32_t opt) |
| { |
| while (true) { |
| git_blame__entry *ent; |
| git_blame__origin *suspect = NULL; |
| |
| /* Find a suspect to break down */ |
| for (ent = blame->ent; !suspect && ent; ent = ent->next) |
| if (!ent->guilty) |
| suspect = ent->suspect; |
| if (!suspect) |
| return 0; /* all done */ |
| |
| /* We'll use this suspect later in the loop, so hold on to it for now. */ |
| origin_incref(suspect); |
| |
| if (pass_blame(blame, suspect, opt) < 0) |
| return -1; |
| |
| /* Take responsibility for the remaining entries */ |
| for (ent = blame->ent; ent; ent = ent->next) { |
| if (same_suspect(ent->suspect, suspect)) { |
| ent->guilty = true; |
| ent->is_boundary = !git_oid_cmp( |
| git_commit_id(suspect->commit), |
| &blame->options.oldest_commit); |
| } |
| } |
| origin_decref(suspect); |
| } |
| |
| coalesce(blame); |
| |
| return 0; |
| } |
| |
| void git_blame__free_entry(git_blame__entry *ent) |
| { |
| if (!ent) return; |
| origin_decref(ent->suspect); |
| git__free(ent); |
| } |