bugfix: Process escaped colon in GCC depfiles.  (#1774)

* Added ability to parse escaped colons in GCC Dep files enabling ninja to parse dep files of GCC 10 on Windows

* Added generated depfile_parser.cc

* Addressed formatting

* Added extra tests with real world examples of paths produced by both GCC 10 and Clang and GCC pre 10. Adjusted one test so it doesn't fail

* Adjusted regular expression to not match \: if the character following the : is either EOF or whitespace

* Fixed typo in regex (should be 0x20 for space not 0xa)

* Changed regular expression form using lookahead to instead matching a separate expression. This was needed as re2c pre version 1.17 is broken when using lookaheads. Also added tests for \: followed by whitespace

* Addressed formatting

* Forgot a missing std::

* Fixed formatting for spaces after , as well as respecting column width
diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc
index 90d4a8a..0b7dce1 100644
--- a/src/depfile_parser.cc
+++ b/src/depfile_parser.cc
@@ -1,4 +1,4 @@
-/* Generated by re2c 1.1.1 */
+/* Generated by re2c 1.3 */
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -166,22 +166,23 @@
       goto yy5;
 yy13:
       yych = *(yymarker = ++in);
-      if (yych <= 0x1F) {
+      if (yych <= ' ') {
         if (yych <= '\n') {
           if (yych <= 0x00) goto yy5;
           if (yych <= '\t') goto yy16;
           goto yy17;
         } else {
           if (yych == '\r') goto yy19;
-          goto yy16;
+          if (yych <= 0x1F) goto yy16;
+          goto yy21;
         }
       } else {
-        if (yych <= '#') {
-          if (yych <= ' ') goto yy21;
-          if (yych <= '"') goto yy16;
-          goto yy23;
+        if (yych <= '9') {
+          if (yych == '#') goto yy23;
+          goto yy16;
         } else {
-          if (yych == '\\') goto yy25;
+          if (yych <= ':') goto yy25;
+          if (yych == '\\') goto yy27;
           goto yy16;
         }
       }
@@ -231,26 +232,63 @@
       }
 yy25:
       yych = *++in;
-      if (yych <= 0x1F) {
+      if (yych <= '\f') {
+        if (yych <= 0x00) goto yy28;
+        if (yych <= 0x08) goto yy26;
+        if (yych <= '\n') goto yy28;
+      } else {
+        if (yych <= '\r') goto yy28;
+        if (yych == ' ') goto yy28;
+      }
+yy26:
+      {
+        // De-escape colon sign, but preserve other leading backslashes.
+        // Regular expression uses lookahead to make sure that no whitespace
+        // nor EOF follows. In that case it'd be the : at the end of a target
+        int len = (int)(in - start);
+        if (len > 2 && out < start)
+          memset(out, '\\', len - 2);
+        out += len - 2;
+        *out++ = ':';
+        continue;
+      }
+yy27:
+      yych = *++in;
+      if (yych <= ' ') {
         if (yych <= '\n') {
           if (yych <= 0x00) goto yy11;
           if (yych <= '\t') goto yy16;
           goto yy11;
         } else {
           if (yych == '\r') goto yy11;
-          goto yy16;
+          if (yych <= 0x1F) goto yy16;
+          goto yy30;
         }
       } else {
-        if (yych <= '#') {
-          if (yych <= ' ') goto yy26;
-          if (yych <= '"') goto yy16;
-          goto yy23;
+        if (yych <= '9') {
+          if (yych == '#') goto yy23;
+          goto yy16;
         } else {
-          if (yych == '\\') goto yy28;
+          if (yych <= ':') goto yy25;
+          if (yych == '\\') goto yy32;
           goto yy16;
         }
       }
-yy26:
+yy28:
+      ++in;
+      {
+        // Backslash followed by : and whitespace.
+        // It is therefore normal text and not an escaped colon
+        int len = (int)(in - start - 1);
+        // Need to shift it over if we're overwriting backslashes.
+        if (out < start)
+          memmove(out, start, len);
+        out += len;
+        if (*(in - 1) == '\n')
+          have_newline = true;
+        break;
+      }
+yy30:
       ++in;
       {
         // 2N backslashes plus space -> 2N backslashes, end of filename.
@@ -260,24 +298,25 @@
         out += len - 1;
         break;
       }
-yy28:
+yy32:
       yych = *++in;
-      if (yych <= 0x1F) {
+      if (yych <= ' ') {
         if (yych <= '\n') {
           if (yych <= 0x00) goto yy11;
           if (yych <= '\t') goto yy16;
           goto yy11;
         } else {
           if (yych == '\r') goto yy11;
-          goto yy16;
+          if (yych <= 0x1F) goto yy16;
+          goto yy21;
         }
       } else {
-        if (yych <= '#') {
-          if (yych <= ' ') goto yy21;
-          if (yych <= '"') goto yy16;
-          goto yy23;
+        if (yych <= '9') {
+          if (yych == '#') goto yy23;
+          goto yy16;
         } else {
-          if (yych == '\\') goto yy25;
+          if (yych <= ':') goto yy25;
+          if (yych == '\\') goto yy27;
           goto yy16;
         }
       }
diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc
index b32b942..95b4346 100644
--- a/src/depfile_parser.in.cc
+++ b/src/depfile_parser.in.cc
@@ -103,6 +103,29 @@
         *out++ = '#';
         continue;
       }
+      '\\'+ ':' [\x00\x20\r\n\t] {
+        // Backslash followed by : and whitespace.
+        // It is therefore normal text and not an escaped colon
+        int len = (int)(in - start - 1);
+        // Need to shift it over if we're overwriting backslashes.
+        if (out < start)
+          memmove(out, start, len);
+        out += len;
+        if (*(in - 1) == '\n')
+          have_newline = true;
+        break;
+      }
+      '\\'+ ':' {
+        // De-escape colon sign, but preserve other leading backslashes.
+        // Regular expression uses lookahead to make sure that no whitespace
+        // nor EOF follows. In that case it'd be the : at the end of a target
+        int len = (int)(in - start);
+        if (len > 2 && out < start)
+          memset(out, '\\', len - 2);
+        out += len - 2;
+        *out++ = ':';
+        continue;
+      }
       '$$' {
         // De-escape dollar character.
         *out++ = '$';
diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc
index bf1a0bc..8e2cd25 100644
--- a/src/depfile_parser_test.cc
+++ b/src/depfile_parser_test.cc
@@ -142,6 +142,41 @@
   ASSERT_EQ(0u, parser_.ins_.size());
 }
 
+TEST_F(DepfileParserTest, EscapedColons)
+{
+  std::string err;
+  // Tests for correct parsing of depfiles produced on Windows
+  // by both Clang, GCC pre 10 and GCC 10
+  EXPECT_TRUE(Parse(
+"c\\:\\gcc\\x86_64-w64-mingw32\\include\\stddef.o: \\\n"
+" c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.h \n",
+      &err));
+  ASSERT_EQ("", err);
+  ASSERT_EQ(1u, parser_.outs_.size());
+  EXPECT_EQ("c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.o",
+            parser_.outs_[0].AsString());
+  ASSERT_EQ(1u, parser_.ins_.size());
+  EXPECT_EQ("c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.h",
+            parser_.ins_[0].AsString());
+}
+
+TEST_F(DepfileParserTest, EscapedTargetColon)
+{
+  std::string err;
+  EXPECT_TRUE(Parse(
+"foo1\\: x\n"
+"foo1\\:\n"
+"foo1\\:\r\n"
+"foo1\\:\t\n"
+"foo1\\:",
+      &err));
+  ASSERT_EQ("", err);
+  ASSERT_EQ(1u, parser_.outs_.size());
+  EXPECT_EQ("foo1\\", parser_.outs_[0].AsString());
+  ASSERT_EQ(1u, parser_.ins_.size());
+  EXPECT_EQ("x", parser_.ins_[0].AsString());
+}
+
 TEST_F(DepfileParserTest, SpecialChars) {
   // See filenames like istreambuf.iterator_op!= in
   // https://github.com/google/libcxx/tree/master/test/iterators/stream.iterators/istreambuf.iterator/