[doc_checker] Refactor link verification to be more robust.

We used to run into error code 429 (too many requests) on a few servers.
Note that this makes the tool significantly slower (~2 minutes). There are ways this can be sped up in the future.

Test: manually ran the tool and verified that it completed.
Change-Id: I8fe611f7fe157f6689937c218b8e0d723ae0033a
diff --git a/tools/doc_checker/BUILD.gn b/tools/doc_checker/BUILD.gn
index ad7790b..30c3713 100644
--- a/tools/doc_checker/BUILD.gn
+++ b/tools/doc_checker/BUILD.gn
@@ -12,6 +12,7 @@
   sources = [
     "graph.dart",
     "link_scraper.dart",
+    "link_verifier.dart",
     "projects.dart",
   ]
 
diff --git a/tools/doc_checker/bin/main.dart b/tools/doc_checker/bin/main.dart
index ae944b4..540a1a2 100644
--- a/tools/doc_checker/bin/main.dart
+++ b/tools/doc_checker/bin/main.dart
@@ -6,11 +6,11 @@
 import 'dart:io';
 
 import 'package:args/args.dart';
-import 'package:http/http.dart' as http;
 import 'package:path/path.dart' as path;
 
 import 'package:doc_checker/graph.dart';
 import 'package:doc_checker/link_scraper.dart';
+import 'package:doc_checker/link_verifier.dart';
 import 'package:doc_checker/projects.dart';
 
 const String _optionHelp = 'help';
@@ -66,14 +66,6 @@
   bool get hasLocation => location != null;
 }
 
-Future<bool> isLinkValid(Uri link) async {
-  try {
-    return (await http.get(link)).statusCode == 200;
-  } on IOException {
-    return false;
-  }
-}
-
 Future<Null> main(List<String> args) async {
   final ArgParser parser = new ArgParser()
     ..addFlag(
@@ -108,17 +100,17 @@
   final List<String> docs = new Directory(docsDir)
       .listSync(recursive: true)
       .where((FileSystemEntity entity) =>
-        path.extension(entity.path) == '.md' &&
-        // Skip these files created by macOS since they're not real Markdown:
-        // https://apple.stackexchange.com/q/14980
-        !path.basename(entity.path).startsWith('._'))
+          path.extension(entity.path) == '.md' &&
+          // Skip these files created by macOS since they're not real Markdown:
+          // https://apple.stackexchange.com/q/14980
+          !path.basename(entity.path).startsWith('._'))
       .map((FileSystemEntity entity) => entity.path)
       .toList();
 
   final String readme = path.join(docsDir, 'README.md');
   final Graph graph = new Graph();
   final List<Error> errors = <Error>[];
-  final List<Future<Error>> pendingErrors = <Future<Error>>[];
+  final List<Link<String>> linksToVerify = [];
 
   for (String doc in docs) {
     final String label = path.relative(doc, from: docsDir);
@@ -151,12 +143,7 @@
             }
           }
           if (shouldTestLink) {
-            pendingErrors.add(() async {
-              if (!(await isLinkValid(uri))) {
-                return new Error(ErrorType.brokenLink, label, uri.toString());
-              }
-              return null;
-            }());
+            linksToVerify.add(new Link(uri, label));
           }
         }
         continue;
@@ -184,9 +171,13 @@
     }
   }
 
-  // Resolve all pending errors.
-  errors.addAll(
-      (await Future.wait(pendingErrors)).where((Error error) => error != null));
+  // Verify http links.
+  await verifyLinks(linksToVerify, (Link<String> link, bool isValid) {
+    if (!isValid) {
+      errors.add(
+          new Error(ErrorType.brokenLink, link.payload, link.uri.toString()));
+    }
+  });
 
   // Verify singletons and orphans.
   final List<Node> unreachable = graph.removeSingletons()
diff --git a/tools/doc_checker/lib/link_verifier.dart b/tools/doc_checker/lib/link_verifier.dart
new file mode 100644
index 0000000..13aa5ec
--- /dev/null
+++ b/tools/doc_checker/lib/link_verifier.dart
@@ -0,0 +1,64 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:async';
+import 'dart:io';
+
+import 'package:http/http.dart' as http;
+
+class Link<P> {
+  final Uri uri;
+  final P payload;
+
+  Link(this.uri, this.payload);
+
+  @override
+  String toString() => uri.toString();
+}
+
+typedef OnElementVerified<P> = void Function(Link<P> link, bool isValid);
+
+Future<Null> verifyLinks<P>(
+    List<Link<P>> links, OnElementVerified<P> callback) async {
+  final Map<String, List<Link<P>>> urisByDomain = {};
+  // Group URLs by domain in order to handle "too many requests" error on a
+  // per-domain basis.
+  for (Link<P> link in links) {
+    urisByDomain.putIfAbsent(link.uri.authority, () => []).add(link);
+  }
+  await Future.wait(urisByDomain.keys.map((String domain) =>
+      new _LinkVerifier(urisByDomain[domain]).verify(callback)));
+  return null;
+}
+
+class _LinkVerifier<P> {
+  final List<Link<P>> links;
+
+  _LinkVerifier(this.links);
+
+  Future<Null> verify(OnElementVerified<P> callback) async {
+    for (Link<P> link in links) {
+      callback(link, await _verifyLink(link));
+    }
+    return null;
+  }
+
+  Future<bool> _verifyLink(Link<P> link) async {
+    try {
+      for (int i = 0; i < 3; i++) {
+        final http.Response response = await http.head(link.uri);
+        final int code = response.statusCode;
+        if (code == HttpStatus.tooManyRequests) {
+          final int delay = int.tryParse(response.headers['retry-after']) ?? 50;
+          sleep(new Duration(milliseconds: delay));
+          continue;
+        }
+        return code == HttpStatus.ok;
+      }
+    } on IOException {
+      // Properly return an invalid link below instead of crashing.
+    }
+    return false;
+  }
+}