From 521cc7336cc22a35a3e7c991539d7cbdc3a8ef79 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 11 Feb 2008 01:16:24 +0100 Subject: [PATCH] New module 'git-merge-changelog'. --- ChangeLog | 6 + lib/git-merge-changelog.c | 1273 +++++++++++++++++++++++++++++++++++++++++++ modules/git-merge-changelog | 36 ++ 3 files changed, 1315 insertions(+) create mode 100644 lib/git-merge-changelog.c create mode 100644 modules/git-merge-changelog diff --git a/ChangeLog b/ChangeLog index 93fc2ce11..236260db8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2008-02-10 Bruno Haible + + New module 'git-merge-changelog'. + * modules/git-merge-changelog: New file. + * lib/git-merge-changelog.c: New file. + 2008-02-10 Jim Meyering useless-if-before-free: New option: --list (-l). diff --git a/lib/git-merge-changelog.c b/lib/git-merge-changelog.c new file mode 100644 index 000000000..f2220f215 --- /dev/null +++ b/lib/git-merge-changelog.c @@ -0,0 +1,1273 @@ +/* git-merge-changelog - git "merge" driver for GNU style ChangeLog files. + Copyright (C) 2008 Bruno Haible + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* README: + The default merge driver of 'git' *always* produces conflicts when + pulling public modifications into a privately modified ChangeLog file. + This is because ChangeLog files are always modified at the top; the + default merge driver has no clue how to deal with this. Furthermore + the conflicts are presented with more <<<< ==== >>>> markers than + necessary; this is because the default merge driver makes pointless + effects to look at the individual line changes inside a ChangeLog entry. + + This program serves as a 'git' merge driver that avoids these problems. + 1. It produces no conflict when ChangeLog entries have been inserted + at the top both in the public and in the private modification. It + puts the privately added entries above the publicly added entries. + 2. It respects the structure of ChangeLog files: entries are not split + into lines but kept together. + 3. It also handles the case of small modifications of past ChangeLog + entries, or of removed ChangeLog entries: they are merged as one + would expect it. + 4. Conflicts are presented at the top of the file, rather than where + they occurred, so that the user will see them immediately. (Unlike + for source code written in some programming language, conflict markers + that are located several hundreds lines from the top will not cause + any syntax error and therefore would be likely to remain unnoticed.) + */ + +/* Installation: + $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog + $ cd /tmp/testdir123 + $ ./configure + $ make + $ make install + - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines + + [merge "cl-merge"] + name = GNU-style ChangeLog merge driver + driver = /usr/local/bin/git-merge-changelog %O %A %B + + - In every directory that contains a ChangeLog file, add a file + '.gitattributes' with this line: + + ChangeLog merge=cl-merge + + (See "man 5 gitattributes" for more info.) + */ + +/* Calling convention: + A merge driver is called with three filename arguments: + 1. %O = The common ancestor of %A and %B. + 2. %A = The file's contents from the "current branch". + 3. %B = The file's contents from the "other branch"; this is the contents + being merged in. + + In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem + maintainer to a central maintainer): + 2. %A = The file's newest pulled contents; modified by other committers. + 3. %B = The user's newest copy of the file; modified by the user. + In case of a downstream pull (e.g. from a central repository to the user): + 2. %A = The user's newest copy of the file; modified by the user. + 3. %B = The file's newest pulled contents; modified by other committers. + + It should write its merged output into file %A. It can also echo some + remarks to stdout. It should exit with return code 0 if the merge could + be resolved cleanly, or with non-zero return code if there were conflicts. + */ + +/* How it works: + The structure of a ChangeLog file: It consists of ChangeLog entries. A + ChangeLog entry starts at a line following a blank line and that starts with + a non-whitespace character, or at the beginning of a file. + The merge driver works as follows: It reads the three files into memory and + dissects them into ChangeLog entries. It then finds the differences between + %O and %B. They are classified as: + - removals (some consecutive entries removed), + - changes (some consecutive entries removed, some consecutive entries + added), + - additions (some consecutive entries added). + The driver then attempts to apply the changes to %A. + To this effect, it first computes a correspondence between the entries in %O + and the entries in %A, using fuzzy string matching to still identify changed + entries. + - Removals are applied one by one. If the entry is present in %A, at any + position, it is removed. If not, the removal is marked as a conflict. + - Additions at the top of %B are applied at the top of %A. + - Additions between entry x and entry y (y may be the file end) in %B are + applied between entry x and entry y in %A (if they still exist and are + still consecutive in %A), otherwise the additions are marked as a + conflict. + - Changes are categorized into "simple changes": + entry1 ... entryn + are mapped to + added_entry ... added_entry modified_entry1 ... modified_entryn, + where the correspondence between entry_i and modified_entry_i is still + clear; and "big changes": these are all the rest. Simple changes at the + top of %B are applied by putting the added entries at the top of %A. The + changes in simple changes are applied one by one; possibly leading to + single-entry conflicts. Big changes are applied en bloc, possibly + leading to conflicts spanning multiple entries. + - Conflicts are output at the top of the file and cause an exit status of + 1. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "progname.h" +#include "error.h" +#include "read-file.h" +#include "gl_list.h" +#include "gl_array_list.h" +#include "gl_linkedhash_list.h" +#include "gl_linked_list.h" +#include "xalloc.h" +#include "xmalloca.h" +#include "fstrcmp.h" +#include "minmax.h" +#include "fwriteerror.h" + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + abort (); \ + } \ + while (0) + +#define FSTRCMP_THRESHOLD 0.6 + +/* Representation of a ChangeLog entry. + The string may contain NUL bytes; therefore it is represented as a plain + opaque memory region. */ +struct entry +{ + char *string; + size_t length; +}; + +/* Compare two entries for equality. */ +static bool +entry_equals (const void *elt1, const void *elt2) +{ + const struct entry *entry1 = (const struct entry *) elt1; + const struct entry *entry2 = (const struct entry *) elt2; + return entry1->length == entry2->length + && memcmp (entry1->string, entry2->string, entry1->length) == 0; +}; + +/* Return a hash code of the contents of a ChangeLog entry. */ +static size_t +entry_hashcode (const void *elt) +{ + const struct entry *entry = (const struct entry *) elt; + /* See http://www.haible.de/bruno/hashfunc.html. */ + const char *s; + size_t n; + size_t h = 0; + + for (s = entry->string, n = entry->length; n > 0; s++, n--) + h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9))); + + return h; +} + +/* Perform a fuzzy comparison of two ChangeLog entries. + Return a similarity measure of the two entries, a value between 0 and 1. + 0 stands for very distinct, 1 for identical. */ +static double +entry_fstrcmp (const struct entry *entry1, const struct entry *entry2) +{ + /* fstrcmp works only on NUL terminated strings. */ + char *memory; + double similarity; + + if (memchr (entry1->string, '\0', entry1->length) != NULL) + return 0.0; + if (memchr (entry2->string, '\0', entry2->length) != NULL) + return 0.0; + memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1); + { + char *p = memory; + memcpy (p, entry1->string, entry1->length); + p += entry1->length; + *p++ = '\0'; + memcpy (p, entry2->string, entry2->length); + p += entry2->length; + *p++ = '\0'; + } + similarity = fstrcmp (memory, memory + entry1->length + 1); + freea (memory); + return similarity; +} + +/* This structure represents an entire ChangeLog file, after it was read + into memory. */ +struct changelog_file +{ + /* The entries, as a list. */ + gl_list_t /* */ entries_list; + /* The entries, as a list in opposite direction. */ + gl_list_t /* */ entries_reversed; + /* The entries, as an array. */ + size_t num_entries; + struct entry **entries; +}; + +/* Read a ChangeLog file into memory. + Return the contents in *RESULT. */ +static void +read_changelog_file (const char *filename, struct changelog_file *result) +{ + /* Read the file in text mode, otherwise it's hard to recognize empty + lines. */ + size_t length; + char *contents = read_file (filename, &length); + if (contents == NULL) + { + fprintf (stderr, "could not read file '%s'\n", filename); + exit (EXIT_FAILURE); + } + + result->entries_list = + gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode, + NULL, true); + result->entries_reversed = + gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode, + NULL, true); + /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts + at a line following a blank line and that starts with a non-whitespace + character, or at the beginning of a file. + Split the file contents into entries. */ + { + char *contents_end = contents + length; + char *start = contents; + while (start < contents_end) + { + /* Search the end of the current entry. */ + char *ptr = start; + struct entry *curr; + + while (ptr < contents_end) + { + ptr = memchr (ptr, '\n', contents_end - ptr); + if (ptr == NULL) + { + ptr = contents_end; + break; + } + ptr++; + if (contents_end - ptr >= 2 + && ptr[0] == '\n' + && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' ')) + { + ptr++; + break; + } + } + + curr = XMALLOC (struct entry); + curr->string = start; + curr->length = ptr - start; + gl_list_add_last (result->entries_list, curr); + gl_list_add_first (result->entries_reversed, curr); + + start = ptr; + } + } + + result->num_entries = gl_list_size (result->entries_list); + result->entries = XNMALLOC (result->num_entries, struct entry *); + { + size_t index = 0; + gl_list_iterator_t iter = gl_list_iterator (result->entries_list); + const void *elt; + gl_list_node_t node; + while (gl_list_iterator_next (&iter, &elt, &node)) + result->entries[index++] = (struct entry *) elt; + gl_list_iterator_free (&iter); + ASSERT (index == result->num_entries); + } +} + +/* Compute a mapping (correspondence) between entries of FILE1 and of FILE2. + Return a set of two arrays: + - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry + from FILE1 is not found in FILE2). + - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry + from FILE2 is not found in FILE1). + The correspondence also takes into account small modifications; i.e. the + indicated relation is not equality of entries but best-match similarity + of entries. */ +static void +compute_mapping (struct changelog_file *file1, struct changelog_file *file2, + ssize_t *result[2]) +{ + /* Mapping from indices in file1 to indices in file2. */ + ssize_t *index_mapping; + /* Mapping from indices in file2 to indices in file1. */ + ssize_t *index_mapping_reverse; + size_t n1 = file1->num_entries; + size_t n2 = file2->num_entries; + ssize_t i, j; + + index_mapping = XNMALLOC (n1, ssize_t); + for (i = 0; i < n1; i++) + index_mapping[i] = -1; + + index_mapping_reverse = XNMALLOC (n2, ssize_t); + for (j = 0; j < n2; j++) + index_mapping_reverse[j] = -1; + + for (i = n1 - 1; i >= 0; i--) + /* Take an entry from file1. */ + if (index_mapping[i] < 0) + { + struct entry *entry = file1->entries[i]; + /* Search whether it occurs in file2. */ + j = gl_list_indexof (file2->entries_reversed, entry); + if (j >= 0) + { + j = n2 - 1 - j; + /* Found an exact correspondence. */ + ASSERT (index_mapping_reverse[j] < 0); + index_mapping[i] = j; + index_mapping_reverse[j] = i; + /* Look for more occurrences of the same entry. */ + { + ssize_t curr_i = i; + ssize_t curr_j = j; + + for (;;) + { + ssize_t next_i; + ssize_t next_j; + + next_i = + gl_list_indexof_from (file1->entries_reversed, n1 - curr_i, + entry); + if (next_i < 0) + break; + next_j = + gl_list_indexof_from (file2->entries_reversed, n2 - curr_j, + entry); + if (next_j < 0) + break; + curr_i = n1 - 1 - next_i; + curr_j = n2 - 1 - next_j; + ASSERT (index_mapping[curr_i] < 0); + ASSERT (index_mapping_reverse[curr_j] < 0); + index_mapping[curr_i] = curr_j; + index_mapping_reverse[curr_j] = curr_i; + } + } + } + } + + for (i = n1 - 1; i >= 0; i--) + /* Take an entry from file1. */ + if (index_mapping[i] < 0) + { + struct entry *entry_i = file1->entries[i]; + /* Search whether it approximately occurs in file2. */ + ssize_t best_j = -1; + double best_j_similarity = 0.0; + for (j = n2 - 1; j >= 0; j--) + if (index_mapping_reverse[j] < 0) + { + double similarity = entry_fstrcmp (entry_i, file2->entries[j]); + if (similarity > best_j_similarity) + { + best_j = j; + best_j_similarity = similarity; + } + } + if (best_j_similarity >= FSTRCMP_THRESHOLD) + { + /* Found a similar entry in file2. */ + struct entry *entry_j = file2->entries[best_j]; + /* Search whether it approximately occurs in file1 at index i. */ + ssize_t best_i = -1; + double best_i_similarity = 0.0; + ssize_t ii; + for (ii = n1 - 1; ii >= 0; ii--) + if (index_mapping[ii] < 0) + { + double similarity = + entry_fstrcmp (file1->entries[ii], entry_j); + if (similarity > best_i_similarity) + { + best_i = i; + best_i_similarity = similarity; + } + } + if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i) + { + index_mapping[i] = best_j; + index_mapping_reverse[best_j] = i; + } + } + } + + result[0] = index_mapping; + result[1] = index_mapping_reverse; +} + +/* An "edit" is a textual modification performed by the user, that needs to + be applied to the other file. */ +enum edit_type +{ + /* Some consecutive entries were added. */ + ADDITION, + /* Some consecutive entries were removed; some other consecutive entries + were added at the same position. (Not necessarily the same number of + entries.) */ + CHANGE, + /* Some consecutive entries were removed. */ + REMOVAL +}; + +/* This structure represents an edit. */ +struct edit +{ + enum edit_type type; + /* Range of indices into the entries of FILE1. */ + ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */ + /* Range of indices into the entries of FILE2. */ + ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */ +}; + +/* This structure represents the differences from one file, FILE1, to another + file, FILE2. */ +struct differences +{ + /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry + from FILE1 is not found in FILE2). */ + ssize_t *index_mapping; + /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry + from FILE2 is not found in FILE1). */ + ssize_t *index_mapping_reverse; + /* The edits that transform FILE1 into FILE2. */ + size_t num_edits; + struct edit **edits; +}; + +/* Import the difference detection algorithm from GNU diff. */ +#define ELEMENT struct entry * +#define EQUAL entry_equals +#define OFFSET ssize_t +#define EXTRA_CONTEXT_FIELDS \ + ssize_t *index_mapping; \ + ssize_t *index_mapping_reverse; +#define NOTE_DELETE(ctxt, xoff) \ + ctxt->index_mapping[xoff] = -1 +#define NOTE_INSERT(ctxt, yoff) \ + ctxt->index_mapping_reverse[yoff] = -1 +#include "diffseq.h" + +/* Compute the differences between the entries of FILE1 and the entries of + FILE2. */ +static void +compute_differences (struct changelog_file *file1, struct changelog_file *file2, + struct differences *result) +{ + /* Unlike compute_mapping, which mostly ignores the order of the entries and + therefore works well when some entries are permuted, here we use the order. + I think this is needed in order to distinguish changes from + additions+removals; I don't know how to say what is a "change" if the + files are considered as unordered sets of entries. */ + struct context ctxt; + size_t n1 = file1->num_entries; + size_t n2 = file2->num_entries; + ssize_t i; + ssize_t j; + gl_list_t /* */ edits; + + ctxt.xvec = file1->entries; + ctxt.yvec = file2->entries; + ctxt.index_mapping = XNMALLOC (n1, ssize_t); + for (i = 0; i < n1; i++) + ctxt.index_mapping[i] = 0; + ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t); + for (j = 0; j < n2; j++) + ctxt.index_mapping_reverse[j] = 0; + ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1; + ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3; + ctxt.too_expensive = n1 + n2; + + /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for + each removed or added entry. */ + compareseq (0, n1, 0, n2, 0, &ctxt); + + /* Complete the index_mapping and index_mapping_reverse arrays. */ + i = 0; + j = 0; + while (i < n1 || j < n2) + { + while (i < n1 && ctxt.index_mapping[i] < 0) + i++; + while (j < n2 && ctxt.index_mapping_reverse[j] < 0) + j++; + ASSERT ((i < n1) == (j < n2)); + if (i == n1 && j == n2) + break; + ctxt.index_mapping[i] = j; + ctxt.index_mapping_reverse[j] = i; + i++; + j++; + } + + /* Create the edits. */ + edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true); + i = 0; + j = 0; + while (i < n1 || j < n2) + { + if (i == n1) + { + struct edit *e; + ASSERT (j < n2); + e = XMALLOC (struct edit); + e->type = ADDITION; + e->j1 = j; + e->j2 = n2 - 1; + gl_list_add_last (edits, e); + break; + } + if (j == n2) + { + struct edit *e; + ASSERT (i < n1); + e = XMALLOC (struct edit); + e->type = REMOVAL; + e->i1 = i; + e->i2 = n1 - 1; + gl_list_add_last (edits, e); + break; + } + if (ctxt.index_mapping[i] >= 0) + { + if (ctxt.index_mapping_reverse[j] >= 0) + { + ASSERT (ctxt.index_mapping[i] == j); + ASSERT (ctxt.index_mapping_reverse[j] == i); + i++; + j++; + } + else + { + struct edit *e; + ASSERT (ctxt.index_mapping_reverse[j] < 0); + e = XMALLOC (struct edit); + e->type = ADDITION; + e->j1 = j; + do + j++; + while (j < n2 && ctxt.index_mapping_reverse[j] < 0); + e->j2 = j - 1; + gl_list_add_last (edits, e); + } + } + else + { + if (ctxt.index_mapping_reverse[j] >= 0) + { + struct edit *e; + ASSERT (ctxt.index_mapping[i] < 0); + e = XMALLOC (struct edit); + e->type = REMOVAL; + e->i1 = i; + do + i++; + while (i < n1 && ctxt.index_mapping[i] < 0); + e->i2 = i - 1; + gl_list_add_last (edits, e); + } + else + { + struct edit *e; + ASSERT (ctxt.index_mapping[i] < 0); + ASSERT (ctxt.index_mapping_reverse[j] < 0); + e = XMALLOC (struct edit); + e->type = CHANGE; + e->i1 = i; + do + i++; + while (i < n1 && ctxt.index_mapping[i] < 0); + e->i2 = i - 1; + e->j1 = j; + do + j++; + while (j < n2 && ctxt.index_mapping_reverse[j] < 0); + e->j2 = j - 1; + gl_list_add_last (edits, e); + } + } + } + + result->index_mapping = ctxt.index_mapping; + result->index_mapping_reverse = ctxt.index_mapping_reverse; + result->num_edits = gl_list_size (edits); + result->edits = XNMALLOC (result->num_edits, struct edit *); + { + size_t index = 0; + gl_list_iterator_t iter = gl_list_iterator (edits); + const void *elt; + gl_list_node_t node; + while (gl_list_iterator_next (&iter, &elt, &node)) + result->edits[index++] = (struct edit *) elt; + gl_list_iterator_free (&iter); + ASSERT (index == result->num_edits); + } +} + +/* An empty entry. */ +static struct entry empty_entry = { NULL, 0 }; + +/* Write the contents of an entry to the output stream FP. */ +static void +entry_write (FILE *fp, struct entry *entry) +{ + if (entry->length > 0) + fwrite (entry->string, 1, entry->length, fp); +} + +/* This structure represents a conflict. + A conflict can occur for various reasons. */ +struct conflict +{ + /* Parts from the ancestor file. */ + size_t num_old_entries; + struct entry **old_entries; + /* Parts of the modified file. */ + size_t num_modified_entries; + struct entry **modified_entries; +}; + +/* Write a conflict to the output stream FP, including markers. */ +static void +conflict_write (FILE *fp, struct conflict *c) +{ + size_t i; + + /* Use the same syntax as git's default merge driver. + Don't indent the contents of the entries (with things like ">" or "-"), + otherwise the user needs more textual editing to resolve the conflict. */ + fputs ("<<<<<<<\n", fp); + for (i = 0; i < c->num_old_entries; i++) + entry_write (fp, c->old_entries[i]); + fputs ("=======\n", fp); + for (i = 0; i < c->num_modified_entries; i++) + entry_write (fp, c->modified_entries[i]); + fputs (">>>>>>>\n", fp); +} + +/* Long options. */ +static const struct option long_options[] = +{ + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } +}; + +/* Print a usage mesage and exit. */ +static void +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, "Try `%s --help' for more information.\n", + program_name); + else + { + printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n", + program_name); + printf ("\n"); + printf ("Merges independent modifications of a ChangeLog style file.\n"); + printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n"); + printf ("A-FILE-NAME names the publicly modified file.\n"); + printf ("B-FILE-NAME names the user-modified file.\n"); + printf ("Writes the merged file into A-FILE-NAME.\n"); + printf ("\n"); + printf ("Informative output:\n"); + printf (" -h, --help display this help and exit\n"); + printf (" -V, --version output version information and exit\n"); + printf ("\n"); + fputs ("Report bugs to .\n", + stdout); + } + + exit (status); +} + +int +main (int argc, char *argv[]) +{ + int optchar; + bool do_help; + bool do_version; + + /* Set program name for messages. */ + set_program_name (argv[0]); + + /* Set default values for variables. */ + do_help = false; + do_version = false; + + /* Parse command line options. */ + while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF) + switch (optchar) + { + case '\0': /* Long option. */ + break; + case 'h': + do_help = true; + break; + case 'V': + do_version = true; + break; + default: + usage (EXIT_FAILURE); + } + + if (do_version) + { + /* Version information is requested. */ + printf ("%s\n", program_name); + printf ("Copyright (C) %s Free Software Foundation, Inc.\n\ +License GPLv2+: GNU GPL version 2 or later \n\ +This is free software: you are free to change and redistribute it.\n\ +There is NO WARRANTY, to the extent permitted by law.\n\ +", + "2008"); + printf ("Written by %s.\n", "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + if (do_help) + { + /* Help is requested. */ + usage (EXIT_SUCCESS); + } + + /* Test argument count. */ + if (optind + 3 != argc) + error (EXIT_FAILURE, 0, "expected three arguments"); + + { + const char *ancestor_file_name; /* O-FILE-NAME */ + const char *destination_file_name; /* A-FILE-NAME */ + bool downstream; + const char *other_file_name; /* B-FILE-NAME */ + const char *mainstream_file_name; + const char *modified_file_name; + struct changelog_file ancestor_file; + struct changelog_file mainstream_file; + struct changelog_file modified_file; + /* Mapping from indices in ancestor_file to indices in mainstream_file. */ + ssize_t *index_mapping; + /* Mapping from indices in mainstream_file to indices in ancestor_file. */ + ssize_t *index_mapping_reverse; + struct differences diffs; + gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */ + gl_list_t /* */ result_entries; + gl_list_t /* */ result_conflicts; + + ancestor_file_name = argv[optind]; + destination_file_name = argv[optind + 1]; + other_file_name = argv[optind + 2]; + + /* Heuristic to determine whether it's a pull in downstream direction + (e.g. pull from a centralized server) or a pull in upstream direction + (e.g. "git stash apply"). + + For ChangeLog this distinction is important. The difference between + an "upstream" and a "downstream" repository is that more people are + looking at the "upstream" repository. They want to be informed about + changes and expect them to be shown at the top of the ChangeLog. + When a user pulls downstream, on the other hand, he has two options: + a) He gets the change entries from the central repository also at the + top of his ChangeLog, and his own changes come after them. + b) He gets the change entries from the central repository after those + he has collected for his branch. His own change entries stay at + the top of the ChangeLog file. + In the case a) he has to reorder the ChangeLog before he can commit. + No one does that. So most people want b). + In other words, the order of entries in a ChangeLog should represent + the order in which they have flown (or will flow) into the *central* + repository. + + But in git this is fundamentally indistinguishable, because when Linus + pulls patches from akpm and akpm pulls patches from Linus, it's not + clear which of the two is more "upstream". Also, when you have many + branches in a repository and pull from one to another, "git" has no way + to know which branch is more "upstream" than the other. The git-tag(1) + manual page also says: + "One important aspect of git is it is distributed, and being + distributed largely means there is no inherent "upstream" or + "downstream" in the system." + Therefore anyone who attempts to produce a ChangeLog from the merge + history will fail. + + Here we allow the user to specify the pull direction through an + environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two + environment variables are not set, we assume a "simple single user" + usage pattern: He manages local changes through stashes and uses + "git pull" only to pull downstream. + + How to distinguish these situation? There are several hints: + - During a "git stash apply", GIT_REFLOG_ACTION is not set. During + a "git pull", it is set to 'pull'. + - During a "git stash apply", there is an environment variable of + the form GITHEAD_<40_hex_digits>='Stashed changes'. */ + { + const char *var; + + var = getenv ("GIT_DOWNSTREAM"); + if (var != NULL && var[0] != '\0') + downstream = true; + else + { + var = getenv ("GIT_UPSTREAM"); + if (var != NULL && var[0] != '\0') + downstream = false; + else + { + var = getenv ("GIT_REFLOG_ACTION"); + #if 0 /* Debugging code */ + printf ("GIT_REFLOG_ACTION=|%s|\n", var); + #endif + if (var != NULL + && (strncmp (var, "pull", 4) == 0 + || strncmp (var, "merge origin", 12) == 0)) + downstream = true; + else + { + /* "git stash apply", "git rebase" and similar. */ + downstream = false; + } + } + } + } + + #if 0 /* Debugging code */ + { + char buf[1000]; + printf ("First line of %%A:\n"); + sprintf (buf, "head -1 %s", destination_file_name); system (buf); + printf ("First line of %%B:\n"); + sprintf (buf, "head -1 %s", other_file_name); system (buf); + printf ("Guessing direction: %sstream\n", downstream ? "down" : "up"); + } + #endif + + if (downstream) + { + mainstream_file_name = other_file_name; + modified_file_name = destination_file_name; + } + else + { + mainstream_file_name = destination_file_name; + modified_file_name = other_file_name; + } + + /* Read the three files into memory. */ + read_changelog_file (ancestor_file_name, &ancestor_file); + read_changelog_file (mainstream_file_name, &mainstream_file); + read_changelog_file (modified_file_name, &modified_file); + + /* Compute correspondence between the entries of ancestor_file and of + mainstream_file. */ + { + ssize_t *result[2]; + compute_mapping (&ancestor_file, &mainstream_file, result); + index_mapping = result[0]; + index_mapping_reverse = result[1]; + } + + /* Compute differences between the entries of ancestor_file and of + modified_file. */ + compute_differences (&ancestor_file, &modified_file, &diffs); + + /* Compute the result. */ + result_entries_pointers = + XNMALLOC (mainstream_file.num_entries, gl_list_node_t); + result_entries = + gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode, + NULL, true); + { + size_t k; + for (k = 0; k < mainstream_file.num_entries; k++) + result_entries_pointers[k] = + gl_list_add_last (result_entries, mainstream_file.entries[k]); + } + result_conflicts = + gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true); + { + size_t e; + for (e = 0; e < diffs.num_edits; e++) + { + struct edit *edit = diffs.edits[e]; + switch (edit->type) + { + case ADDITION: + if (edit->j1 == 0) + { + /* An addition to the top of modified_file. + Apply it to the top of mainstream_file. */ + ssize_t j; + for (j = edit->j2; j >= edit->j1; j--) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_first (result_entries, added_entry); + } + } + else + { + ssize_t i_before; + ssize_t i_after; + ssize_t k_before; + ssize_t k_after; + i_before = diffs.index_mapping_reverse[edit->j1 - 1]; + ASSERT (i_before >= 0); + i_after = (edit->j2 + 1 == modified_file.num_entries + ? ancestor_file.num_entries + : diffs.index_mapping_reverse[edit->j2 + 1]); + ASSERT (i_after >= 0); + ASSERT (i_after == i_before + 1); + /* An addition between ancestor_file.entries[i_before] and + ancestor_file.entries[i_after]. See whether these two + entries still exist in mainstream_file and are still + consecutive. */ + k_before = index_mapping[i_before]; + k_after = (i_after == ancestor_file.num_entries + ? mainstream_file.num_entries + : index_mapping[i_after]); + if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1) + { + /* Yes, the entry before and after are still neighbours + in mainstream_file. Apply the addition between + them. */ + if (k_after == mainstream_file.num_entries) + { + size_t j; + for (j = edit->j1; j <= edit->j2; j++) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_last (result_entries, added_entry); + } + } + else + { + gl_list_node_t node_k_after = result_entries_pointers[k_after]; + size_t j; + for (j = edit->j1; j <= edit->j2; j++) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_before (result_entries, node_k_after, added_entry); + } + } + } + else + { + /* It's not clear where the additions should be applied. + Let the user decide. */ + struct conflict *c = XMALLOC (struct conflict); + size_t j; + c->num_old_entries = 0; + c->old_entries = NULL; + c->num_modified_entries = edit->j2 - edit->j1 + 1; + c->modified_entries = + XNMALLOC (c->num_modified_entries, struct entry *); + for (j = edit->j1; j <= edit->j2; j++) + c->modified_entries[j - edit->j1] = modified_file.entries[j]; + gl_list_add_last (result_conflicts, c); + } + } + break; + case REMOVAL: + { + /* Apply the removals one by one. */ + size_t i; + for (i = edit->i1; i <= edit->i2; i++) + { + struct entry *removed_entry = ancestor_file.entries[i]; + ssize_t k = index_mapping[i]; + if (k >= 0 + && entry_equals (removed_entry, + mainstream_file.entries[k])) + { + /* The entry to be removed still exists in + mainstream_file. Remove it. */ + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + &empty_entry); + } + else + { + /* The entry to be removed was already removed or was + modified. This is a conflict. */ + struct conflict *c = XMALLOC (struct conflict); + c->num_old_entries = 1; + c->old_entries = + XNMALLOC (c->num_old_entries, struct entry *); + c->old_entries[0] = removed_entry; + c->num_modified_entries = 0; + c->modified_entries = NULL; + gl_list_add_last (result_conflicts, c); + } + } + } + break; + case CHANGE: + { + bool simple; + bool done; + /* Test whether the change is "simple", i.e. whether it + consists of small changes to the old ChangeLog entries + and additions before them: + entry_1 ... entry_n + are mapped to + added_entry ... added_entry modified_entry_1 ... modified_entry_n. */ + if (edit->i2 - edit->i1 <= edit->j2 - edit->j1) + { + size_t i; + simple = true; + for (i = edit->i1; i <= edit->i2; i++) + if (entry_fstrcmp (ancestor_file.entries[i], + modified_file.entries[i + edit->j2 - edit->i2]) + < FSTRCMP_THRESHOLD) + { + simple = false; + break; + } + } + else + simple = false; + done = false; + if (simple) + { + /* Apply the additions and each of the single-entry changes + separately. */ + size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */ + size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed; + if (edit->j1 == 0) + { + /* A simple change at the top of modified_file. + Apply it to the top of mainstream_file. */ + ssize_t j; + for (j = edit->j1 + num_added - 1; j >= edit->j1; j--) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_first (result_entries, added_entry); + } + for (j = edit->j1 + num_added; j <= edit->j2; j++) + { + struct entry *changed_entry = modified_file.entries[j]; + size_t i = j + edit->i2 - edit->j2; + ssize_t k = index_mapping[i]; + if (k >= 0 + && entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])) + { + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + changed_entry); + } + else + { + struct conflict *c = XMALLOC (struct conflict); + c->num_old_entries = 1; + c->old_entries = + XNMALLOC (c->num_old_entries, struct entry *); + c->old_entries[0] = ancestor_file.entries[i]; + c->num_modified_entries = 1; + c->modified_entries = + XNMALLOC (c->num_modified_entries, struct entry *); + c->modified_entries[0] = changed_entry; + gl_list_add_last (result_conflicts, c); + } + } + done = true; + } + else + { + ssize_t i_before; + ssize_t k_before; + bool linear; + i_before = diffs.index_mapping_reverse[edit->j1 - 1]; + ASSERT (i_before >= 0); + /* A simple change after ancestor_file.entries[i_before]. + See whether this entry and the following num_changed + entries still exist in mainstream_file and are still + consecutive. */ + k_before = index_mapping[i_before]; + linear = (k_before >= 0); + if (linear) + { + size_t i; + for (i = i_before + 1; i <= i_before + num_changed; i++) + if (index_mapping[i] != k_before + (i - i_before)) + { + linear = false; + break; + } + } + if (linear) + { + gl_list_node_t node_for_insert = + result_entries_pointers[k_before + 1]; + ssize_t j; + for (j = edit->j1 + num_added - 1; j >= edit->j1; j--) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_before (result_entries, node_for_insert, added_entry); + } + for (j = edit->j1 + num_added; j <= edit->j2; j++) + { + struct entry *changed_entry = modified_file.entries[j]; + size_t i = j + edit->i2 - edit->j2; + ssize_t k = index_mapping[i]; + ASSERT (k >= 0); + if (entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])) + { + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + changed_entry); + } + else + { + struct conflict *c = XMALLOC (struct conflict); + c->num_old_entries = 1; + c->old_entries = + XNMALLOC (c->num_old_entries, struct entry *); + c->old_entries[0] = ancestor_file.entries[i]; + c->num_modified_entries = 1; + c->modified_entries = + XNMALLOC (c->num_modified_entries, struct entry *); + c->modified_entries[0] = changed_entry; + gl_list_add_last (result_conflicts, c); + } + } + done = true; + } + } + } + else + { + /* A big change. + See whether the num_changed entries still exist unchanged + in mainstream_file and are still consecutive. */ + ssize_t i_first; + ssize_t k_first; + bool linear_unchanged; + i_first = edit->i1; + k_first = index_mapping[i_first]; + linear_unchanged = + (k_first >= 0 + && entry_equals (ancestor_file.entries[i_first], + mainstream_file.entries[k_first])); + if (linear_unchanged) + { + size_t i; + for (i = i_first + 1; i <= edit->i2; i++) + if (!(index_mapping[i] == k_first + (i - i_first) + && entry_equals (ancestor_file.entries[i], + mainstream_file.entries[index_mapping[i]]))) + { + linear_unchanged = false; + break; + } + } + if (linear_unchanged) + { + gl_list_node_t node_for_insert = + result_entries_pointers[k_first]; + ssize_t j; + size_t i; + for (j = edit->j2; j >= edit->j1; j--) + { + struct entry *new_entry = modified_file.entries[j]; + gl_list_add_before (result_entries, node_for_insert, new_entry); + } + for (i = edit->i1; i <= edit->i2; i++) + { + ssize_t k = index_mapping[i]; + ASSERT (k >= 0); + ASSERT (entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])); + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + &empty_entry); + } + done = true; + } + } + if (!done) + { + struct conflict *c = XMALLOC (struct conflict); + size_t i, j; + c->num_old_entries = edit->i2 - edit->i1 + 1; + c->old_entries = + XNMALLOC (c->num_old_entries, struct entry *); + for (i = edit->i1; i <= edit->i2; i++) + c->old_entries[i - edit->i1] = ancestor_file.entries[i]; + c->num_modified_entries = edit->j2 - edit->j1 + 1; + c->modified_entries = + XNMALLOC (c->num_modified_entries, struct entry *); + for (j = edit->j1; j <= edit->j2; j++) + c->modified_entries[j - edit->j1] = modified_file.entries[j]; + gl_list_add_last (result_conflicts, c); + } + } + break; + } + } + } + + /* Output the result. */ + { + FILE *fp = fopen (destination_file_name, "w"); + if (fp == NULL) + { + fprintf (stderr, "could not write file '%s'\n", destination_file_name); + exit (EXIT_FAILURE); + } + + /* Output the conflicts at the top. */ + { + size_t n = gl_list_size (result_conflicts); + size_t i; + for (i = 0; i < n; i++) + conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i)); + } + { + size_t n = gl_list_size (result_entries); + size_t i; + for (i = 0; i < n; i++) + entry_write (fp, (struct entry *) gl_list_get_at (result_entries, i)); + } + + if (fwriteerror (fp)) + { + fprintf (stderr, "error writing to file '%s'\n", destination_file_name); + exit (EXIT_FAILURE); + } + } + + exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + } +} diff --git a/modules/git-merge-changelog b/modules/git-merge-changelog new file mode 100644 index 000000000..fdfebfd76 --- /dev/null +++ b/modules/git-merge-changelog @@ -0,0 +1,36 @@ +Description: +git "merge" driver for GNU style ChangeLog files + +Files: +lib/git-merge-changelog.c + +Depends-on: +getopt +stdbool +progname +error +read-file +list +array-list +linkedhash-list +linked-list +xalloc +xmalloca +fstrcmp +minmax +fwriteerror + +configure.ac: + +Makefile.am: +bin_PROGRAMS = git-merge-changelog +git_merge_changelog_LDADD = -L. -lgnu + +Include: + +License: +GPL + +Maintainer: +Bruno Haible + -- 2.11.0