blob: 15bb637fd937d2ac33484349211c4c243e46120b [file] [log] [blame] [edit]
// Copyright 2022 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! link_checker implements the [`DocCheck` trait used to perform checks on the links and images
//! found in markdown documentation in the Fuchsia project.
use crate::{
md_element::{CowStr, Element, LinkType},
DocCheck, DocCheckError, DocCheckerArgs, DocLine,
};
use anyhow::{bail, Result};
use async_trait::async_trait;
use fuchsia_hyper::{new_https_client_from_tcp_options, HttpsClient, TcpOptions};
use http::{uri::Uri, Request, StatusCode};
use hyper::Body;
use std::{
collections::{HashMap, HashSet},
ffi::OsStr,
path::{self, Path, PathBuf},
};
use url::Url;
// path_help is a wrapper to allow mocking path checks
// exists. and is_dir.
cfg_if::cfg_if! {
if #[cfg(test)] {
use crate::mock_path_helper_module as path_helper;
} else {
use crate::path_helper_module as path_helper;
}
}
/// Files that are allowed to link to the documentation host site.
const FILES_ALLOWED_TO_LINK_TO_PUBLISHED_DOCS: [&str; 1] = ["navbar.md"];
const GERRIT_HOST: &str = "fuchsia.googlesource.com";
pub(crate) const PUBLISHED_DOCS_HOST: &str = "fuchsia.dev";
/// List of words that cannot be used as single word ALT text for images.
/// This is a pretty small list (n < 5), but if it grows large, it might be
/// better to manages as an external file vs. inline.
const DISALLOWED_ALT_IMAGE_TEXT: [&str; 1] = [""];
// TODO(fxbug.dev/113039): disallow "drawing, "image" for alt text";
/// List of active repos under fuchsia.googlesource.com which can be linked to.
const VALID_PROJECTS: [&str; 19] = [
"", // root page of all projects
"cobalt",
"drivers", // This is a family of projects.
"experiences",
"fargo",
"fidl-misc",
"fidlbolt",
"fontdata",
"fuchsia",
"infra", // This is a family of projects, there are sub-repos below this path.
"integration",
"intellij-language-fidl",
"jiri",
"manifest",
"third_party", // This is a family of projects, there are sub-repos below this path.
"vscode-language-fidl",
"workstation",
"samples",
"sdk-samples", // This is a family of projects, there are sub-repos below this path.
];
/// Top level paths to the published doc site that any page can link to.
/// Links to other locations have to be allowed by adding the source doc
/// page to FILES_ALLOWED_TO_LINK_TO_PUBLISHED_DOCS.
/// "" - is the root of fuchsia.dev
/// "reference" is the generated reference documentation.
/// "schema" is the schema URLs used for parsing.
const PUBLISHED_LINKS_ALLOWED: [&str; 3] = ["", "reference", "schema"];
/// A link (URL, or file path) and it location in the markdown.
#[derive(Debug, Eq, Hash, PartialEq)]
pub struct LinkReference {
pub link: String,
pub location: DocLine,
}
/// LinkChecker checks the links and images in markdown files.
/// As the links are checked, links to external websites are collected and optionally
/// checked in the post-check.
#[derive(Debug)]
struct LinkChecker {
pub root_dir: PathBuf,
pub project: String,
pub docs_folder: PathBuf,
pub check_remote_links: bool,
links: Vec<LinkReference>,
}
impl LinkChecker {
/// Takes the raw link from the markdown parser, and normalizes it into
/// a string that is a filepath or URL.
fn make_link_to_check(&self, filename: &Path, link_url: &CowStr<'_>) -> Result<String> {
let link_to_check: String;
let link = link_url.trim();
let filename_string = filename.to_string_lossy();
// relative_filename is relative to the root e.g. /home/googler/fuchsia/docs/file.md
// is /docs/file.md. Note that this form (with the leading /) is used a lot because
// when published, it is at the root of the documentation site.
//
// This does cause confusion in this code since PathBuf::join() does not naively join
// paths with a leading /. See https://doc.rust-lang.org/nightly/std/path/struct.PathBuf.html#method.push
// for more details.
let relative_filename =
filename_string.strip_prefix(self.root_dir.to_string_lossy().as_ref()).unwrap_or("");
// relative_parent is the directory of the file name, e.g /docs/file.md is /docs.
let temp_path = PathBuf::from(relative_filename);
let relative_parent = temp_path.parent().unwrap_or(&self.docs_folder);
// External links that have any query parameters are decoded when parsed by the markdown parser.
// To make things easier later, parse the URL and use the encoded parameters.
if link.starts_with("http://") || link.starts_with("https://") {
let url = Url::parse(link).or_else(|e| bail!("Could not parse url {}: {}", link, e))?;
if let Some(query) = url.query() {
// split on ? from the original string, to avoid complexities
// to to-stringizing a url without the query params.
if let Some((first_part, _)) = link.split_once('?') {
let encoded_link = format!("{}?{}", first_part, query);
link_to_check = encoded_link;
} else {
bail!("Cannot parse {}. Appears to have query parameters, but no ? in the string?", link);
}
} else {
link_to_check = url.to_string();
}
} else if link.starts_with("/reference") {
// Generated reference docs are in /reference, and are
// treated as external since they are not part of the source tree.
link_to_check = format!("https://{}{}", PUBLISHED_DOCS_HOST, link);
} else if link.starts_with('/') {
// paths are used as-is.
link_to_check = link.to_string();
} else if link.starts_with('#') {
// Anchors are appended to the current file.
link_to_check = format!("{}{}", self.root_dir.join(relative_filename).display(), link);
} else {
// Otherwise, see if it is parsable as a URI, if not, append it to the relative_parent
// and hope for the best. This usually is something relative like "details-subdir/info.md"
let uri: Uri = match link.parse() {
Ok(u) => u,
Err(_e) => {
let parent_based_link =
format!("{}/{}", relative_parent.to_string_lossy(), link);
parent_based_link.parse::<Uri>().or_else(|e| {
bail!("Cannot parse parent based uri: {}: {}", parent_based_link, e)
})?
}
};
// Check the scheme. If there is one, use it.
// If there is mailto: (which is commonly used without the //) use the original link
// Otherwise, it is a relative file path that what parsed.
link_to_check = match uri.scheme() {
Some(_) => uri.to_string(),
None if link.contains("mailto:") => link.to_string(),
None => format!("{}/{}", relative_parent.to_str().unwrap(), link),
};
}
Ok(link_to_check)
}
}
#[async_trait]
impl DocCheck for LinkChecker {
fn name(&self) -> &str {
"LinkChecker"
}
/// Applies the checks for links.
fn check<'a>(&mut self, element: &'a Element<'_>) -> Result<Option<Vec<DocCheckError>>> {
let mut errors: Vec<DocCheckError> = vec![];
// Get all the links from the element. This is needed since the element is commonly
// a Block or some other collection of elements.
if let Some(links) = element.get_links() {
for ele in links {
let link: &'a CowStr<'a> = match ele {
Element::Link(link_type, link_url, link_title, elements, _) => {
let link = match link_type {
LinkType::Inline => link_url,
LinkType::Reference => link_url,
LinkType::ReferenceUnknown => {
let text = elements
.iter()
.map(|e| e.get_contents())
.collect::<Vec<String>>()
.join("");
if link_url.starts_with("\"") && link_url.ends_with("\"") {
// This is not a link but an array of quoted strings.
continue;
}
if text == link_url.to_string() && link_url == link_title {
errors.push(DocCheckError::new_info_helpful(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"unescaped [{}] not treating this as a reference link. this is brackets ",
link_url),
&format!("escaped \\[{}\\] or make a link [{}](/docs/{}", link_title, link_url,link_url)
));
} else {
errors.push(DocCheckError::new_error_helpful(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"Unknown reference link to [{}][{}]",
text ,link_url
),
&format!(
"making sure you added a matching [{}]: YOUR_LINK_HERE below this reference",
link_url)));
}
continue;
}
LinkType::Collapsed => link_url,
LinkType::CollapsedUnknown => {
errors.push(DocCheckError::new_error(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"Unknown collapsed link to {} ({})",
link_url, link_title
),
));
link_url
}
LinkType::Shortcut => link_url,
LinkType::ShortcutUnknown => {
// Check if this is a case where the text is in [].
let text = elements
.iter()
.map(|e| e.get_contents())
.collect::<Vec<String>>()
.join("");
if link_url.starts_with("\"") && link_url.ends_with("\"") {
// This is not a link but an array of quoted strings.
continue;
}
if text == link_url.to_string() && link_url == link_title {
errors.push(DocCheckError::new_info_helpful(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"unescaped [{}] not treating this as a shortcut link.",
link_url
),
&format!(
"escaped \\[{}\\] or make a link [{}](/docs/{}",
link_title, link_url, link_url
),
));
} else {
errors.push(DocCheckError::new_error_helpful(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"Unknown reference link to [{}][{}]",
text ,link_url
),
&format!(
"making sure you added a matching [{}]: YOUR_LINK_HERE below this reference",
link_url)));
}
continue;
}
LinkType::Autolink => link_url,
LinkType::Email => return Ok(None),
};
if link.starts_with("mailto:") {
// do nothing.
return Ok(None);
}
link
}
Element::Image(link_type, link_url, link_title, elements, _) => {
let link = match link_type {
LinkType::Inline => link_url,
LinkType::Reference => link_url,
LinkType::ReferenceUnknown => {
errors.push(DocCheckError::new_error(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"Unknown image reference link to {} ({})",
link_url, link_title
),
));
link_url
}
LinkType::Collapsed => link_url,
LinkType::CollapsedUnknown => todo!(),
LinkType::Shortcut => link_url,
LinkType::ShortcutUnknown => {
// Check if this is a case where the text is in [].
let text = elements
.iter()
.map(|e| e.get_contents())
.collect::<Vec<String>>()
.join("");
if text == link_url.to_string() && link_url == link_title {
errors.push(DocCheckError::new_info_helpful(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"unescaped brackets - treated as text [{}]",
link_url
),
&format!(
"escaping \\[{}\\] or an inline link [{}][/docs/{}",
link_title, link_url, link_url
),
));
continue;
} else {
errors.push(DocCheckError::new_error(
ele.doc_line().line_num,
ele.doc_line().file_name.clone(),
&format!(
"Unknown image shortcut link to {} ({})",
link_url, link_title
),
));
}
link_url
}
LinkType::Autolink => link_url,
LinkType::Email => return Ok(None),
};
if link.starts_with("mailto:") {
// do nothing.
return Ok(None);
}
// Check for alt text
let alt = ele.get_contents().trim().to_string();
if DISALLOWED_ALT_IMAGE_TEXT.contains(&alt.as_str()) {
errors.push(DocCheckError::new_error(
ele.doc_line().line_num,
ele.doc_line().file_name,
&format!(
"Invalid image alt text: {:?}, cannot be one of {:?}",
alt, DISALLOWED_ALT_IMAGE_TEXT
),
))
}
link
}
_ => {
return Ok(None);
}
};
let link_to_check =
match self.make_link_to_check(&element.doc_line().file_name, link) {
Ok(link) => link,
Err(e) => {
errors.push(DocCheckError::new_error(
element.doc_line().line_num,
element.doc_line().file_name,
&e.to_string(),
));
String::from("")
}
};
if link_to_check.is_empty() {
continue;
}
let saw_error = do_check_link(&element.doc_line(), &link_to_check, &self.project)?
.map(|err| errors.push(err))
.is_some();
let root_dir = self.root_dir.display().to_string();
match is_intree_link(&self.project, &root_dir, &self.docs_folder, &link_to_check) {
Ok(Some(in_tree_path)) => {
if let Some(link_error) = do_in_tree_check(
&element.doc_line(),
&self.root_dir,
&self.docs_folder,
&link_to_check,
&in_tree_path,
) {
errors.push(link_error);
}
}
Ok(None) => {
if self.check_remote_links && !saw_error {
self.links.push(LinkReference {
link: link_to_check.clone(),
location: element.doc_line(),
});
}
}
Err(e) => errors.push(DocCheckError::new_error(
element.doc_line().line_num,
element.doc_line().file_name,
&e.to_string(),
)),
};
}
}
if errors.is_empty() {
Ok(None)
} else {
Ok(Some(errors))
}
}
/// At the end, check that the out of tree links work, if requested.
async fn post_check(&self) -> Result<Option<Vec<DocCheckError>>> {
let mut errors = vec![];
if let Some(link_errors) = check_external_links(&self.links).await {
errors.extend(link_errors);
}
if !errors.is_empty() {
Ok(Some(errors))
} else {
Ok(None)
}
}
}
/// Checks specific to a link to an in-tree path.
pub(crate) fn do_in_tree_check(
doc_line: &DocLine,
root_dir: &Path,
docs_folder: &Path,
link_to_check: &str,
in_tree_path: &Path,
) -> Option<DocCheckError> {
let filepath = root_dir.join(in_tree_path.strip_prefix("/").unwrap_or(in_tree_path));
if !path_helper::exists(&filepath) {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("in-tree link to {} could not be found at {:?}", link_to_check, filepath),
));
} else if filepath.components().any(|c| c == path::Component::ParentDir) {
let cannonical_path = match filepath.canonicalize() {
Ok(p) => p,
Err(e) => {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("Error canonicalizing path: {:?}: {}", filepath, e),
))
}
};
if !cannonical_path.starts_with(root_dir) {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!(
"relative path {:?} points outside root directory {:?}",
in_tree_path, root_dir
),
));
}
} else if path_helper::is_dir(&filepath) {
// If it is a directory to the /docs directory, that directory needs
// to have a README.md file.
if in_tree_path
.components()
.position(|c| c == path::Component::Normal(OsStr::new(&docs_folder)))
== Some(1)
{
let readme_path = filepath.join("README.md");
if !path_helper::exists(&readme_path) {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!(
"in-tree link to {} could not be found at {:?} or {:?}",
link_to_check, filepath, readme_path
),
));
}
}
// Non-docs paths are OK.
}
None
}
/// Parse the link into a Uri, and check that it is either a path or that the http/https
/// links are valid for the host they are pointing to.
pub(crate) fn do_check_link(
doc_line: &DocLine,
link: &str,
project_being_checked: &str,
) -> Result<Option<DocCheckError>> {
match link.parse::<Uri>() {
Ok(uri) => {
match uri.scheme() {
Some(scheme) => match scheme.as_str() {
"http" | "https" => {}
_ => return Ok(None),
},
None => return Ok(None),
}
if let Some(errors) = check_link_authority(doc_line, &uri, project_being_checked) {
return Ok(Some(errors));
}
// Check for host language parameter on google owned urls.
if uri
.authority()
.map(|a| a.host())
.map(|host| host.ends_with(".dev") || host.ends_with(".google.com"))
.unwrap_or(false)
&& uri.query().map(|query| query.contains("hl=")).unwrap_or(false)
{
return Ok(Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("Do not add host language parameter `hl` {}", link),
)));
}
Ok(None)
}
Err(e) => Ok(Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("Invalid link {} : {}", link, e),
))),
}
}
/// Returns the relative path from the root_dir if the link it to a file in the fuchsia source tree.
pub(crate) fn is_intree_link(
project: &str,
root_dir: &str,
docs_folder: &Path,
link_to_check: &str,
) -> Result<Option<PathBuf>> {
if link_to_check.starts_with(root_dir) {
let mut filepath = link_to_check.strip_prefix(root_dir).unwrap_or(link_to_check);
(filepath, _) = filepath.split_once('#').unwrap_or((filepath, ""));
// Split off the query parameters, if any
(filepath, _) = filepath.split_once('?').unwrap_or((filepath, ""));
return Ok(Some(PathBuf::from(filepath)));
} else if link_to_check.starts_with(&format!("https://{}/{}", GERRIT_HOST, project)) {
let uri: Uri = match link_to_check.parse::<Uri>() {
Ok(uri) => uri,
Err(e) => bail!("Invalid Url {}: {:?}", link_to_check, e),
};
let parts = uri.path().split('/').collect::<Vec<&str>>();
if parts.len() <= 3 {
let p = parts.join("/");
if p == format!("/{}/", project) || p == format!("/{}", project) {
return Ok(None);
}
}
// skip over any branch spec. The first part is empty. It is +, or +show, or +log, etc.
if parts.len() > 3 && parts[2].starts_with('+') {
let filepath = PathBuf::from("/");
if parts[3] == "refs" && parts[4] == "heads" {
return Ok(Some(filepath.join(parts[6..].join("/"))));
} else if parts[3] == "HEAD" && parts[4] == docs_folder.to_string_lossy() {
return Ok(Some(filepath.join(parts[4..].join("/"))));
} else {
return Ok(None);
}
} else {
return Ok(Some(PathBuf::from(parts.join("/"))));
}
} else if link_to_check.starts_with('/') {
let (mut filepath, _) = link_to_check.split_once('#').unwrap_or((link_to_check, ""));
(filepath, _) = filepath.split_once('?').unwrap_or((filepath, ""));
return match normalize_intree_path(filepath) {
Ok(normalized) => Ok(Some(normalized)),
Err(e) => Err(e),
};
}
Ok(None)
}
/// Checks that URLs are not incorrectly pointing to fuchsia.dev or fuchsia.googlesource.com
fn check_link_authority(
doc_line: &DocLine,
uri: &Uri,
project_being_checked: &str,
) -> Option<DocCheckError> {
let link_to_fuchsia_gerrit_host = match uri.authority() {
Some(a) => *a == GERRIT_HOST,
None => false,
};
let link_to_published_docs_host = match uri.authority() {
Some(a) => *a == PUBLISHED_DOCS_HOST,
None => false,
};
let parts = uri.path().split('/').collect::<Vec<&str>>();
let project = parts[1];
/*
Links to gerrit source code should be to HEAD or refs/heads/main or equivalent.
The links can also not be to unknown or obsolete projects.
*/
if link_to_fuchsia_gerrit_host {
if !VALID_PROJECTS.contains(&project) {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("Obsolete or invalid project {}: {}", project, uri),
));
}
if !on_gerrit_master(uri) && project == project_being_checked {
let branch_index = parts.iter().position(|x| *x == "+").unwrap();
let file_path =
if parts[branch_index + 1] == "refs" && parts[branch_index + 2] == "heads" {
parts[branch_index + 4..].join("/")
} else {
parts[branch_index + 2..].join("/")
};
//Possible point of discussion: Non-HEAD links are open discussion for non- //docs links.
// Allow files that are not markdown, such as OWNERS to be links.
if parts.contains(&"docs") && uri.path().ends_with(".md") {
return Some(DocCheckError::new_error_helpful(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("Invalid link to non-main branch: {}", uri),
&format!("filepath: /{}", file_path),
));
}
}
}
/*
Links to fuchsia.dev (where the docs are published) should not use http(s):, but
rather use relative paths from the fuchsia source root. For example, /docs/some/file.md.
The some projects in fuchsia.dev are allowed to be directly linked since they are not
part of the markdown checked in.
There is also a list of exceptions of files that can link via https, these allow gtiles to
point to fuchsia.dev, and could be removed at some point in the future.
*/
if link_to_published_docs_host && !PUBLISHED_LINKS_ALLOWED.contains(&project) {
let base_name = doc_line
.file_name
.file_name()
.unwrap_or_else(|| OsStr::new(""))
.to_string_lossy()
.to_string();
if !FILES_ALLOWED_TO_LINK_TO_PUBLISHED_DOCS.contains(&base_name.as_str()) {
// If the link is to the published docs directory (fuchsia-src), then
// a path should be used instead.
if parts.contains(&"fuchsia-src") {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!(
"Should not link to {} via {}, use relative filepath",
uri,
uri.scheme_str().unwrap_or_default()
),
));
}
}
}
None
}
/// Checks whether the URI points to the master branch of a Gerrit (i.e.,
/// googlesource.com) project.
fn on_gerrit_master(uri: &Uri) -> bool {
let path_segments: Vec<&str> =
uri.path().split('/').skip_while(|p| p != &"+").skip(1).take(3).collect();
if path_segments.is_empty() {
// no + branch spec in the URL, so defaults to main.
return true;
}
// Links to gerrit are of the form:
// https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/docs/README.md
// https://fuchsia.googlesource.com/<project>/+/refs/heads/<branch>/<path>
// path_segments is the path (everything after the server name) split on /
// branch_index is the index of the + in the path.
match path_segments[0] {
"master" | "main" | "HEAD" => true,
"refs" => {
// There can be refs/ to other things, so make sure there are at least 3 segments after
// the +. Then join these together for comparison.
if 3 < path_segments.len() {
let ref_path = path_segments[1..4].join("/");
return ref_path == "refs/heads/main" || ref_path == "refs/heads/master";
}
false
}
_ => false,
}
}
fn normalize_intree_path(filepath: &str) -> Result<PathBuf> {
let orig = PathBuf::from(filepath);
let mut normalized = PathBuf::new();
let segments = orig.components();
for part in segments {
match part {
std::path::Component::Prefix(p) => {
// Prefix is used on Windows systems and is
// part of the path that is not part of normalizing
// the path.
// For Non-windows, it should not appear.
eprintln!("Unexpected path component {:?}", p);
}
std::path::Component::RootDir => {
//RootDir is the beginning of the Path, after
// any Prefix.
normalized.push("/");
}
std::path::Component::CurDir => {
//CurDir is the current directory, "."
// it is ignored.
}
std::path::Component::ParentDir => {
// ParentDir is the parent of the current item, ".."
if !normalized.pop() {
bail!("Cannot normalize {}, references parent beyond root.", filepath);
}
}
std::path::Component::Normal(p) => normalized.push(p),
}
}
Ok(normalized)
}
pub async fn check_external_links(links: &Vec<LinkReference>) -> Option<Vec<DocCheckError>> {
// sort the links to take advantage of keep alive
// HashMap is <authority, set<links>.
let mut domain_sorted_links = HashMap::<String, HashSet<&LinkReference>>::new();
let mut errors = vec![];
for link in links {
match link.link.parse::<Uri>() {
Ok(uri) => {
if let Some(authority) = uri.authority() {
let key = authority.to_string();
let set = domain_sorted_links.entry(key).or_default();
set.insert(link);
} else {
errors.push(DocCheckError::new_error(
link.location.line_num,
link.location.file_name.clone(),
&format!("Error parsing {}: no authority found", link.link),
));
}
}
Err(e) => errors.push(DocCheckError::new_error(
link.location.line_num,
link.location.file_name.clone(),
&format!("Error parsing {}: {}", link.link, e),
)),
};
}
let client: HttpsClient = new_https_client_from_tcp_options(tcp_options());
for (authority, links) in domain_sorted_links {
let mut pending_requests = vec![];
println!("checking {authority} {link_count} links", link_count = links.len());
for link in links {
let p = check_url_link(client.clone(), link);
pending_requests.push(p);
}
let results = futures::future::join_all(pending_requests);
(results.await).into_iter().flatten().for_each(|e| errors.push(e));
}
if errors.is_empty() {
None
} else {
Some(errors)
}
}
/// Check that the URL is valid (200 or 301 or 302).
async fn check_url_link(client: HttpsClient, link: &LinkReference) -> Option<DocCheckError> {
let request = match Request::get(&link.link).body(Body::from("")) {
Ok(request) => request,
Err(e) => {
return Some(DocCheckError::new_error(
link.location.line_num,
link.location.file_name.clone(),
&format!("Error {} requesting {}", e, link.link),
))
}
};
match client.request(request).await {
Ok(response) => match response.status() {
StatusCode::OK | StatusCode::FOUND | StatusCode::MOVED_PERMANENTLY => None,
_ => Some(DocCheckError::new_error(
link.location.line_num,
link.location.file_name.clone(),
&format!("Error response {} reading {}", response.status(), &link.link),
)),
},
Err(e) => Some(DocCheckError::new_error(
link.location.line_num,
link.location.file_name.clone(),
&format!("Error {} reading {}", e, link.link),
)),
}
}
fn tcp_options() -> TcpOptions {
let mut options: TcpOptions = std::default::Default::default();
// Use TCP keepalive to notice stuck connections.
// After 60s with no data received send a probe every 15s.
options.keepalive_idle = Some(std::time::Duration::from_secs(60));
options.keepalive_interval = Some(std::time::Duration::from_secs(15));
// After 8 probes go unacknowledged treat the connection as dead.
options.keepalive_count = Some(8);
options
}
/// Called from main to register all the checks to preform which are implemented in this module.
pub(crate) fn register_markdown_checks(opt: &DocCheckerArgs) -> Result<Vec<Box<dyn DocCheck>>> {
let checker = LinkChecker {
root_dir: opt.root.clone(),
project: opt.project.clone(),
docs_folder: opt.docs_folder.clone(),
check_remote_links: !opt.local_links_only,
links: vec![],
};
Ok(vec![Box::new(checker)])
}
#[cfg(test)]
mod tests {
use super::*;
use crate::DocContext;
#[test]
fn test_make_link_to_check() -> Result<()> {
let checker = LinkChecker {
root_dir: PathBuf::from("/my/root/fuchsia"),
project: "fuchsia".to_string(),
docs_folder: PathBuf::from("docs"),
check_remote_links: false,
links: vec![],
};
let filename = PathBuf::from("/my/root/fuchsia/docs/index.md");
let test_data = [
("README.md", "/docs/README.md"),
("https://my-server.com", "https://my-server.com/"),
(
"http://my-server.com/page?qp=1&words=one two three",
"http://my-server.com/page?qp=1&words=one%20two%20three",
),
("/docs/some-file.md", "/docs/some-file.md"),
("#Anchor-name", "/docs/index.md#Anchor-name"),
("path/to/sub/info.md", "/docs/path/to/sub/info.md"),
("mailto:someone@somewhere.tld", "mailto:someone@somewhere.tld"),
("https:///bad-url?x=", "https:///bad-url?x="),
("/reference/to/generated.md", "https://fuchsia.dev/reference/to/generated.md"),
];
for (input, expected) in test_data {
let actual =
checker.make_link_to_check(filename.as_path(), &CowStr::Borrowed(input))?;
assert_eq!(actual, expected)
}
Ok(())
}
#[test]
fn test_normalize_intree_path() -> Result<()> {
let test_data = [
("/docs", PathBuf::from("/docs")),
("/docs/../docs", PathBuf::from("/docs")),
("/docs/sub/location.md", PathBuf::from("/docs/sub/location.md")),
("/docs/sub/two/../location.md", PathBuf::from("/docs/sub/location.md")),
("/docs/sub/./location.md", PathBuf::from("/docs/sub/location.md")),
];
for (data, expected) in test_data {
let actual = normalize_intree_path(data)?;
assert_eq!(actual, expected);
}
Ok(())
}
#[test]
fn test_happy_path() -> Result<()> {
let opt = DocCheckerArgs {
root: PathBuf::from("/path/to/fuchsia"),
project: "fuchsia".to_string(),
docs_folder: PathBuf::from("docs"),
local_links_only: true,
check_reference_links: false,
};
let mut checks = register_markdown_checks(&opt)?;
assert_eq!(checks.len(), 1);
let ctx = DocContext::new(
PathBuf::from("/docs/README.md"),
"This is a line to [something](/docs/something.md",
);
if let Some(check) = checks.first_mut() {
for ele in ctx {
let errors = check.check(&ele)?;
assert!(errors.is_none(), "expected none, got {:?}", errors);
}
}
Ok(())
}
#[test]
fn test_is_in_tree_link() -> Result<()> {
let root_dir = "/some/root/dir";
let project = "fuchsia";
let docs_folder = PathBuf::from("docs");
let test_cases = [
("/docs/README.md", Some(PathBuf::from("/docs/README.md"))),
("/docs/somewhere/file.md#header1", Some(PathBuf::from("/docs/somewhere/file.md"))),
("https://google.com", None),
("mailto:someone@email.com", None),
("/src/to/a/program.cc", Some(PathBuf::from("/src/to/a/program.cc"))),
("https://fuchsia.googlesource.com/fuchsia/+/HEAD/sdk/lib/fdio", None),
("https://fuchsia.googlesource.com/fuchsia/docs/README.md", Some(PathBuf::from("/fuchsia/docs/README.md"))),
("https://fuchsia.googlesource.com/fuchsia/+/7461d8882167e7a9d1b494e3b1734d2c063830fc/build/package.gni#604", None),
("https://fuchsia.googlesource.com/fuchsia/+show/HEAD/docs/concepts/kernel/_toc.yaml", Some(PathBuf::from("/docs/concepts/kernel/_toc.yaml"))),
("https://fuchsia.googlesource.com/fuchsia", None),
("https://fuchsia.googlesource.com/fuchsia/", None),
// Since this is not to the /docs dir, it should not be an in-tree link.
("https://fuchsia.googlesource.com/fuchsia/+log/d381548c6aef76926e6203a2ad2265dd510d1e9b", None)
];
for (link_to_check, expected) in test_cases {
let result = is_intree_link(project, root_dir, &docs_folder, link_to_check)?;
assert_eq!(result, expected);
}
Ok(())
}
#[test]
fn test_check() -> Result<()> {
let opt = DocCheckerArgs {
root: PathBuf::from("/path/to/fuchsia"),
project: "fuchsia".to_string(),
docs_folder: PathBuf::from("docs"),
local_links_only: true,
check_reference_links: false,
};
let mut checks = register_markdown_checks(&opt)?;
assert_eq!(checks.len(), 1);
let test_data: Vec<(DocContext<'_>, Option<Vec<DocCheckError>>)> = vec![
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"This is a line to [something](/docs/something.md)",
),
None,
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"invalid image text ![](/docs/something.png)",
),
Some(
[DocCheckError::new_error(1, PathBuf::from("/docs/README.md"),
"Invalid image alt text: \"\", cannot be one of [\"\"]"),
DocCheckError::new_error(1,PathBuf::from("/docs/README.md"),
"in-tree link to /docs/something.png could not be found at \"/path/to/fuchsia/docs/something.png\"")].to_vec()),
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"invalid url [oops](https:///nowhere/something.md?xx)",
),
Some([DocCheckError::new_error(1, PathBuf::from("/docs/README.md"),
"Invalid link https:///nowhere/something.md?xx : invalid format")].to_vec())
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"relative path outside root [oops](/docs/../../illegal.md)",
),
Some([DocCheckError::new_error(1,PathBuf::from("/docs/README.md"),
"Cannot normalize /docs/../../illegal.md, references parent beyond root.")].to_vec())
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"hl param is not allowed [hl](https://google.com/something?hl=en)",
),
None,
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"invalid project link [garnet](https://fuchsia.googlesource.com/garnet/+/HEAD/src/file.cc)",
),
Some([DocCheckError::new_error(1, PathBuf::from("/docs/README.md"),
"Obsolete or invalid project garnet: https://fuchsia.googlesource.com/garnet/+/HEAD/src/file.cc")].to_vec())
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"A reference link to [`topaz`][flutter-gni]\n\n\
[flutter-gni]: https://fuchsia.googlesource.com/topaz/+/HEAD/runtime/flutter_runner/flutter_app.gni \"Flutter GN build template\""
),
Some([DocCheckError::new_error(1, PathBuf::from("/docs/README.md"),
"Obsolete or invalid project topaz: https://fuchsia.googlesource.com/topaz/+/HEAD/runtime/flutter_runner/flutter_app.gni")].to_vec())
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"non-master branch link to docs [old doc](https://fuchsia.googlesource.com/fuchsia/+/some-branch/docs/file.md)",
),
Some([DocCheckError::new_error_helpful(1,PathBuf::from("/docs/README.md"),
"Invalid link to non-main branch: https://fuchsia.googlesource.com/fuchsia/+/some-branch/docs/file.md","filepath: /docs/file.md")].to_vec())
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"non-master branch link to src ok [old source](https://fuchsia.googlesource.com/fuchsia/+/some-branch/tools/file.cc)",
),
None,
),
(
DocContext::new(
PathBuf::from("/docs/README.md"),
"non-markdown file OK to link to docs [non-source](https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/docs/OWNERS)",
),
None,
)
];
for (ctx, expected_errors) in test_data {
for ele in ctx {
let errors = checks[0].check(&ele)?;
if let Some(ref expected_list) = expected_errors {
let mut expected_iter = expected_list.iter();
if let Some(actual_errors) = errors {
for actual in actual_errors {
if let Some(expected) = expected_iter.next() {
assert_eq!(&actual, expected);
} else {
panic!("Got unexpected error returned: {:?}", actual);
}
}
let unused_errors: Vec<&DocCheckError> = expected_iter.collect();
if !unused_errors.is_empty() {
panic!("Expected more errors: {:?}", unused_errors);
}
} else if expected_errors.is_some() {
panic!("No errors, but expected {:?}", expected_errors);
}
} else if errors.is_some() {
panic!("Got unexpected errors {:?}", errors.unwrap());
}
}
}
Ok(())
}
#[test]
fn test_do_intree_check() -> Result<()> {
let doc_line = DocLine { line_num: 1, file_name: PathBuf::from("some/file.md") };
let root_dir = PathBuf::from("/path/to/fuchsia");
let docs_folder = PathBuf::from("docs");
let test_data = [
("/docs/exists/something.md", "/docs/exists/something.md", None),
("/docs/no_readme", "/docs/no_readme", Some(DocCheckError::new_error(
1, PathBuf::from("some/file.md"),
"in-tree link to /docs/no_readme could not be found at \"/path/to/fuchsia/docs/no_readme\" or \"/path/to/fuchsia/docs/no_readme/README.md\"")))];
for (link_to_check, in_tree_path, expected_error) in test_data {
let result = do_in_tree_check(
&doc_line,
&root_dir,
&docs_folder,
link_to_check,
&PathBuf::from(in_tree_path),
);
assert_eq!(result, expected_error);
}
Ok(())
}
#[test]
fn test_check_reference_links() -> Result<()> {
let opt = DocCheckerArgs {
root: PathBuf::from("/path/to/fuchsia"),
project: "fuchsia".to_string(),
docs_folder: PathBuf::from("docs"),
local_links_only: true,
check_reference_links: true,
};
let mut checks = register_markdown_checks(&opt)?;
assert_eq!(checks.len(), 1);
let test_data: Vec<(DocContext<'_>, Option<Vec<DocCheckError>>)> = vec![
(
DocContext::new_with_checks(
PathBuf::from("/docs/README.md"),
"This is a line to [something](/docs/something.md)",
true
),
None,
),
(
DocContext::new_with_checks(
PathBuf::from("/docs/README.md"),
"invalid url [oops](https:///nowhere/something.md?xx)",
true
),
Some([DocCheckError::new_error(1, PathBuf::from("/docs/README.md"),
"Invalid link https:///nowhere/something.md?xx : invalid format")].to_vec())
),
(
DocContext::new_with_checks(
PathBuf::from("/docs/README.md"),
"A reference link to [`topaz`][flutter-gni]\n\n\
[flutter-gni]: https://fuchsia.googlesource.com/topaz/+/HEAD/runtime/flutter_runner/flutter_app.gni \"Flutter GN build template\"",
true
),
Some([DocCheckError::new_error(1, PathBuf::from("/docs/README.md"),
"Obsolete or invalid project topaz: https://fuchsia.googlesource.com/topaz/+/HEAD/runtime/flutter_runner/flutter_app.gni")].to_vec())
),
(
DocContext::new_with_checks(
PathBuf::from("/docs/README.md"),
"brackets which are not a link [your name here]",
true
),
Some([DocCheckError::new_info_helpful(1, PathBuf::from("/docs/README.md"),
"unescaped [your name here] not treating this as a shortcut link.",
"escaped \\[your name here\\] or make a link [your name here](/docs/your name here")
].to_vec()),
),
(
DocContext::new_with_checks(
PathBuf::from("/docs/README.md"),
"missing [text][link-to-text]", true),
Some([DocCheckError::new_error_helpful(1, PathBuf::from("/docs/README.md"),
"Unknown reference link to [text][link-to-text]",
"making sure you added a matching [link-to-text]: YOUR_LINK_HERE below this reference"
)].to_vec())
),
(
DocContext::new_with_checks(
PathBuf::from("/docs/README.md"),
r#"pw_toolchain_STATIC_ANALYSIS_SKIP_INCLUDE_PATHS = [".*/third_party/.*"]"#,
true
),
None
),
];
for (ctx, expected_errors) in test_data {
for ele in ctx {
let errors = checks[0].check(&ele)?;
if let Some(ref expected_list) = expected_errors {
let mut expected_iter = expected_list.iter();
if let Some(actual_errors) = errors {
for actual in actual_errors {
if let Some(expected) = expected_iter.next() {
assert_eq!(&actual, expected);
} else {
panic!("Got unexpected error returned: {:?}", actual);
}
}
let unused_errors: Vec<&DocCheckError> = expected_iter.collect();
if !unused_errors.is_empty() {
panic!("Expected more errors: {:?}", unused_errors);
}
} else if expected_errors.is_some() {
panic!("No errors, but expected {:?}", expected_errors);
}
} else if errors.is_some() {
panic!("Got unexpected errors {:?}", errors.unwrap());
}
}
}
Ok(())
}
}