Follow GCC/Clang behavior wrt depfiles

The option is called "depfile = gcc" and should support depfiles created
by GCC. GCC does not escape backslashes and GNU Make does not try to
unescape it, so neither should Ninja try to "unescape" it.

Only space (' ') and hash sign ('#') are specially treated by GCC/Clang.
Note that while tabs are also treated specially by GCC, Clang does not,
so do not special case it (why would someone use tabs in a filename?).
Support for 2N trailing backslashes in a filename is a bit questionable,
but is added to be as consistent as possible with GCC/Clang.

See also
https://github.com/llvm-mirror/clang/blob/44c160f916a1b080098b17b466b026aa07475ec2/lib/Frontend/DependencyFile.cpp#L316
https://github.com/gcc-mirror/gcc/blob/22a8377023d59cc01ab0a84a1df56d0e1336efa3/libcpp/mkdeps.c#L47

Fixes https://github.com/ninja-build/ninja/issues/1262
diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc
index 405289f..6faeac6 100644
--- a/src/depfile_parser.cc
+++ b/src/depfile_parser.cc
@@ -30,9 +30,15 @@
 // How do you end a line with a backslash?  The netbsd Make docs suggest
 // reading the result of a shell command echoing a backslash!
 //
-// Rather than implement all of above, we do a simpler thing here:
-// Backslashes escape a set of characters (see "escapes" defined below),
-// otherwise they are passed through verbatim.
+// Rather than implement all of above, we follow what GCC/Clang produces:
+// Backslashes escape a space or hash sign.
+// When a space is preceded by 2N+1 backslashes, it is represents N backslashes
+// followed by space.
+// When a space is preceded by 2N backslashes, it represents 2N backslashes at
+// the end of a filename.
+// A hash sign is escaped by a single backslash. All other backslashes remain
+// unchanged.
+//
 // If anyone actually has depfiles that rely on the more complicated
 // behavior we can adjust this.
 bool DepfileParser::Parse(string* content, string* err) {
@@ -72,7 +78,7 @@
         128, 128, 128, 128, 128, 128, 128, 128, 
         128, 128, 128, 128, 128, 128, 128, 128, 
         128, 128, 128, 128, 128, 128, 128, 128, 
-        128, 128, 128,   0,   0,   0,   0, 128, 
+        128, 128, 128, 128,   0, 128,   0, 128, 
           0, 128, 128, 128, 128, 128, 128, 128, 
         128, 128, 128, 128, 128, 128, 128, 128, 
         128, 128, 128, 128, 128, 128, 128, 128, 
@@ -111,7 +117,8 @@
           if (yych <= '#') goto yy4;
           goto yy12;
         } else {
-          if (yych == '\\') goto yy13;
+          if (yych <= '?') goto yy4;
+          if (yych <= '\\') goto yy13;
           goto yy4;
         }
       }
@@ -143,6 +150,7 @@
       if (yybm[0+yych] & 128) {
         goto yy9;
       }
+yy11:
       {
         // Got a span of plain text.
         int len = (int)(in - start);
@@ -158,24 +166,22 @@
       goto yy5;
 yy13:
       yych = *(yymarker = ++in);
-      if (yych <= '"') {
-        if (yych <= '\f') {
+      if (yych <= 0x1F) {
+        if (yych <= '\n') {
           if (yych <= 0x00) goto yy5;
-          if (yych == '\n') goto yy18;
-          goto yy16;
+          if (yych <= '\t') goto yy16;
+          goto yy17;
         } else {
-          if (yych <= '\r') goto yy20;
-          if (yych == ' ') goto yy22;
+          if (yych == '\r') goto yy19;
           goto yy16;
         }
       } else {
-        if (yych <= 'Z') {
-          if (yych <= '#') goto yy22;
-          if (yych == '*') goto yy22;
-          goto yy16;
+        if (yych <= '#') {
+          if (yych <= ' ') goto yy21;
+          if (yych <= '"') goto yy16;
+          goto yy23;
         } else {
-          if (yych <= ']') goto yy22;
-          if (yych == '|') goto yy22;
+          if (yych == '\\') goto yy25;
           goto yy16;
         }
       }
@@ -188,30 +194,93 @@
       }
 yy16:
       ++in;
-      {
-        // Let backslash before other characters through verbatim.
-        *out++ = '\\';
-        *out++ = yych;
-        continue;
-      }
-yy18:
+      goto yy11;
+yy17:
       ++in;
       {
         // A line continuation ends the current file name.
         break;
       }
-yy20:
+yy19:
       yych = *++in;
-      if (yych == '\n') goto yy18;
+      if (yych == '\n') goto yy17;
       in = yymarker;
       goto yy5;
-yy22:
+yy21:
       ++in;
       {
-        // De-escape backslashed character.
-        *out++ = yych;
+        // 2N+1 backslashes plus space -> N backslashes plus space.
+        int len = (int)(in - start);
+        int n = len / 2 - 1;
+        if (out < start)
+          memset(out, '\\', n);
+        out += n;
+        *out++ = ' ';
         continue;
       }
+yy23:
+      ++in;
+      {
+        // De-escape hash sign, but preserve other leading backslashes.
+        int len = (int)(in - start);
+        if (len > 2 && out < start)
+          memset(out, '\\', len - 2);
+        out += len - 2;
+        *out++ = '#';
+        continue;
+      }
+yy25:
+      yych = *++in;
+      if (yych <= 0x1F) {
+        if (yych <= '\n') {
+          if (yych <= 0x00) goto yy11;
+          if (yych <= '\t') goto yy16;
+          goto yy11;
+        } else {
+          if (yych == '\r') goto yy11;
+          goto yy16;
+        }
+      } else {
+        if (yych <= '#') {
+          if (yych <= ' ') goto yy26;
+          if (yych <= '"') goto yy16;
+          goto yy23;
+        } else {
+          if (yych == '\\') goto yy28;
+          goto yy16;
+        }
+      }
+yy26:
+      ++in;
+      {
+        // 2N backslashes plus space -> 2N backslashes, end of filename.
+        int len = (int)(in - start);
+        if (out < start)
+          memset(out, '\\', len - 1);
+        out += len - 1;
+        break;
+      }
+yy28:
+      yych = *++in;
+      if (yych <= 0x1F) {
+        if (yych <= '\n') {
+          if (yych <= 0x00) goto yy11;
+          if (yych <= '\t') goto yy16;
+          goto yy11;
+        } else {
+          if (yych == '\r') goto yy11;
+          goto yy16;
+        }
+      } else {
+        if (yych <= '#') {
+          if (yych <= ' ') goto yy21;
+          if (yych <= '"') goto yy16;
+          goto yy23;
+        } else {
+          if (yych == '\\') goto yy25;
+          goto yy16;
+        }
+      }
     }
 
     }
diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc
index f8c94b3..735a0c3 100644
--- a/src/depfile_parser.in.cc
+++ b/src/depfile_parser.in.cc
@@ -29,9 +29,15 @@
 // How do you end a line with a backslash?  The netbsd Make docs suggest
 // reading the result of a shell command echoing a backslash!
 //
-// Rather than implement all of above, we do a simpler thing here:
-// Backslashes escape a set of characters (see "escapes" defined below),
-// otherwise they are passed through verbatim.
+// Rather than implement all of above, we follow what GCC/Clang produces:
+// Backslashes escape a space or hash sign.
+// When a space is preceded by 2N+1 backslashes, it is represents N backslashes
+// followed by space.
+// When a space is preceded by 2N backslashes, it represents 2N backslashes at
+// the end of a filename.
+// A hash sign is escaped by a single backslash. All other backslashes remain
+// unchanged.
+//
 // If anyone actually has depfiles that rely on the more complicated
 // behavior we can adjust this.
 bool DepfileParser::Parse(string* content, string* err) {
@@ -68,12 +74,33 @@
       re2c:indent:string = "  ";
 
       nul = "\000";
-      escape = [ \\#*[|\]];
       newline = '\r'?'\n';
 
-      '\\' escape {
-        // De-escape backslashed character.
-        *out++ = yych;
+      '\\\\'* '\\ ' {
+        // 2N+1 backslashes plus space -> N backslashes plus space.
+        int len = (int)(in - start);
+        int n = len / 2 - 1;
+        if (out < start)
+          memset(out, '\\', n);
+        out += n;
+        *out++ = ' ';
+        continue;
+      }
+      '\\\\'+ ' ' {
+        // 2N backslashes plus space -> 2N backslashes, end of filename.
+        int len = (int)(in - start);
+        if (out < start)
+          memset(out, '\\', len - 1);
+        out += len - 1;
+        break;
+      }
+      '\\'+ '#' {
+        // De-escape hash sign, but preserve other leading backslashes.
+        int len = (int)(in - start);
+        if (len > 2 && out < start)
+          memset(out, '\\', len - 2);
+        out += len - 2;
+        *out++ = '#';
         continue;
       }
       '$$' {
@@ -81,13 +108,7 @@
         *out++ = '$';
         continue;
       }
-      '\\' [^\000\r\n] {
-        // Let backslash before other characters through verbatim.
-        *out++ = '\\';
-        *out++ = yych;
-        continue;
-      }
-      [a-zA-Z0-9+,/_:.~()}{%@=!\x80-\xFF-]+ {
+      '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
         // Got a span of plain text.
         int len = (int)(in - start);
         // Need to shift it over if we're overwriting backslashes.
diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc
index 52fe7cd..19224f3 100644
--- a/src/depfile_parser_test.cc
+++ b/src/depfile_parser_test.cc
@@ -101,15 +101,36 @@
             parser_.ins_[2].AsString());
 }
 
+TEST_F(DepfileParserTest, MultipleBackslashes) {
+  // Successive 2N+1 backslashes followed by space (' ') are replaced by N >= 0
+  // backslashes and the space. A single backslash before hash sign is removed.
+  // Other backslashes remain untouched (including 2N backslashes followed by
+  // space).
+  string err;
+  EXPECT_TRUE(Parse(
+"a\\ b\\#c.h: \\\\\\\\\\  \\\\\\\\ \\\\share\\info\\\\#1",
+      &err));
+  ASSERT_EQ("", err);
+  EXPECT_EQ("a b#c.h",
+            parser_.out_.AsString());
+  ASSERT_EQ(3u, parser_.ins_.size());
+  EXPECT_EQ("\\\\ ",
+            parser_.ins_[0].AsString());
+  EXPECT_EQ("\\\\\\\\",
+            parser_.ins_[1].AsString());
+  EXPECT_EQ("\\\\share\\info\\#1",
+            parser_.ins_[2].AsString());
+}
+
 TEST_F(DepfileParserTest, Escapes) {
   // Put backslashes before a variety of characters, see which ones make
   // it through.
   string err;
   EXPECT_TRUE(Parse(
-"\\!\\@\\#$$\\%\\^\\&\\\\:",
+"\\!\\@\\#$$\\%\\^\\&\\[\\]\\\\:",
       &err));
   ASSERT_EQ("", err);
-  EXPECT_EQ("\\!\\@#$\\%\\^\\&\\",
+  EXPECT_EQ("\\!\\@#$\\%\\^\\&\\[\\]\\\\",
             parser_.out_.AsString());
   ASSERT_EQ(0u, parser_.ins_.size());
 }
@@ -123,7 +144,7 @@
 " en@quot.header~ t+t-x!=1 \\\n"
 " openldap/slapd.d/cn=config/cn=schema/cn={0}core.ldif\\\n"
 " Fu\303\244ball\\\n"
-" a\\[1\\]b@2%c",
+" a[1]b@2%c",
       &err));
   ASSERT_EQ("", err);
   EXPECT_EQ("C:/Program Files (x86)/Microsoft crtdefs.h",