| // Copyright 2022 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| //! doc_checker is a CLI tool to check markdown files for correctness in |
| //! the Fuchsia project. |
| |
| pub(crate) use crate::checker::{DocCheck, DocCheckError, DocLine, DocYamlCheck, ErrorLevel}; |
| pub(crate) use crate::md_element::DocContext; |
| use anyhow::{bail, Context, Result}; |
| use argh::FromArgs; |
| use glob::glob; |
| use serde_yaml::Value; |
| use std::fs::{self, File}; |
| use std::io::BufReader; |
| use std::path::PathBuf; |
| mod checker; |
| mod include_checker; |
| mod link_checker; |
| mod md_element; |
| mod parser; |
| mod yaml; |
| |
| // path_helper includes methods to check path attributes |
| // so that these methods can be mocked for unit tests. |
| pub mod path_helper_module { |
| |
| use std::path::Path; |
| pub fn exists(path: &Path) -> bool { |
| path.exists() |
| } |
| pub fn is_dir(path: &Path) -> bool { |
| path.is_dir() |
| } |
| } |
| |
| // Used when testing, the existence and is_dir |
| // is based on the name. |
| // Originally, This was attempted using mockall::automock. |
| // However, since the mocked module is global, and tests |
| // are run multithreaded, keeping the mocking context exclusive |
| // was hard. A mutex could be used, but mutex and async code |
| // do not work nicely together. |
| // |
| // As a result, a name based mock is used for all the tests. |
| #[cfg(test)] |
| pub(crate) mod mock_path_helper_module { |
| use std::path::Path; |
| pub fn exists(path: &Path) -> bool { |
| let path_str = path.to_string_lossy(); |
| |
| // If the path actually exists, return true. This allows for |
| // staging test data. |
| path.exists() || |
| |
| // if is_dir returns true, the directory needs to exist as well. |
| (is_dir(path) &&!path_str.ends_with("no-extension")) || |
| |
| // markdown files exist with a couple exceptions. |
| path_str.ends_with(".md") && |
| (!path_str.ends_with("no_readme/README.md") && |
| !path_str.ends_with("unused/README.md") && |
| !path_str.ends_with("missing.md" ) && |
| !path_str.ends_with("no-extension") ) || |
| |
| // OWNERS file exists. |
| path.ends_with("OWNERS") |
| } |
| |
| pub fn is_dir(path: &Path) -> bool { |
| // If the path is actually a directory, return true. This allows for |
| // staged test data. |
| path.is_dir() || |
| |
| // Paths with no extension are directories, except OWNERS. |
| ( path.extension().is_none() && !path.ends_with("OWNERS")) |
| } |
| } |
| |
| /// Check the markdown documentation using a variety of checks. |
| #[derive(Debug, FromArgs)] |
| pub struct DocCheckerArgs { |
| /// path to the root of the checkout of the project. |
| #[argh(option, default = r#"PathBuf::from(".")"#)] |
| pub root: PathBuf, |
| |
| /// name of project to check, defaults to fuchsia. |
| #[argh(option, default = r#"String::from("fuchsia")"#)] |
| pub project: String, |
| |
| /// (Experimental) Name of the folder inside the project |
| /// which contains documents to check. Defaults to 'docs'. |
| #[argh(option, default = r#"PathBuf::from("docs")"#)] |
| pub docs_folder: PathBuf, |
| |
| /// do not resolve http(s) links |
| #[argh(switch)] |
| pub local_links_only: bool, |
| |
| /// output in JSON format |
| #[argh(switch)] |
| pub json: bool, |
| |
| /// allow links to fuchsia-src. Usually links to |
| /// fuchsia-src should be written as file paths |
| /// to /docs. |
| #[argh(switch)] |
| pub allow_fuchsia_src_links: bool, |
| } |
| |
| #[fuchsia::main] |
| async fn main() -> Result<()> { |
| let mut opt: DocCheckerArgs = argh::from_env(); |
| |
| // Canonicalize the root directory so the rest of the code can rely on |
| // the root directory existing and being a normalized path. |
| opt.root = |
| opt.root.canonicalize().context(format!("invalid root dir for source: {:?} ", opt.root))?; |
| |
| if let Some(mut errors) = do_main(&opt).await? { |
| // Output the result |
| let mut error_count = 0; |
| let mut warning_count = 0; |
| let mut info_count = 0; |
| errors.sort(); |
| if opt.json { |
| println!("{}", serde_json::to_string(&errors)?); |
| if errors.iter().any(|e| e.level == ErrorLevel::Error) { |
| std::process::exit(1); |
| } |
| } else { |
| for e in &errors { |
| match e.level { |
| ErrorLevel::Info => info_count += 1, |
| ErrorLevel::Warning => warning_count += 1, |
| ErrorLevel::Error => error_count += 1, |
| } |
| println!("{}\n", e); |
| } |
| // Only bail if there are errors, warnings should return OK. |
| if error_count > 0 { |
| bail!( |
| "Found {} errors, {} warnings, {} info.", |
| error_count, |
| warning_count, |
| info_count |
| ) |
| } else { |
| println!( |
| "Found {} errors, {} warnings, {} info.", |
| error_count, warning_count, info_count |
| ) |
| } |
| } |
| } else { |
| if opt.json { |
| println!("[]"); |
| } else { |
| println!("No errors found"); |
| } |
| } |
| Ok(()) |
| } |
| |
| /// The actual main function. It is refactored like this to make it easier |
| /// to run it in a unit test. |
| async fn do_main(opt: &DocCheckerArgs) -> Result<Option<Vec<DocCheckError>>> { |
| let root_dir = &opt.root; |
| let docs_project = &opt.project; |
| let docs_dir = root_dir.join(&opt.docs_folder); |
| |
| eprintln!("Checking Project {} {:?}.", docs_project, docs_dir); |
| |
| // Find all the markdown in the docs folder. |
| let pattern = format!("{}/**/*.md", docs_dir.to_string_lossy()); |
| let markdown_files: Vec<PathBuf> = glob(&pattern)? |
| // Keep only non-error results, mapping to Option<PathBuf> |
| .filter_map(|p| p.ok()) |
| // Keep paths with file names, mapped to str& |
| // and rop the hidden files that macs sometime make. |
| .filter_map(|p| { |
| if let Some(name) = p.file_name()?.to_str() { |
| if !name.starts_with("._") { |
| Some(p) |
| } else { |
| None |
| } |
| } else { |
| None |
| } |
| }) |
| .collect(); |
| |
| // Find all the .yaml files. |
| let yaml_pattern = format!("{}/**/*.yaml", docs_dir.to_string_lossy()); |
| let yaml_files: Vec<PathBuf> = glob(&yaml_pattern)?.filter_map(|p| p.ok()).collect(); |
| |
| eprintln!( |
| "Checking {} markdown files and {} yaml files", |
| markdown_files.len(), |
| yaml_files.len() |
| ); |
| |
| /* |
| Doc checking is broken into a couple major phases. |
| |
| 1. Checks are registered from the modules that have structs that implement the DocCheck trait. |
| 2. Each markdown file is parsed into a stream of Elements. Each element is passed to each registered checker. |
| 3. After all the markdown files are parsed, the post-check check is called on each checker. This allows |
| checkers to perform cross-file checks and checks that used data collected from the individual documents. |
| 4. Each yaml file is checked for each yaml checker registered. |
| 5. After all the yaml is checked, the post-check check is called. |
| 6. All the errors are collected and returned. |
| */ |
| let mut markdown_checks: Vec<Box<dyn DocCheck>> = vec![]; |
| let mut errors: Vec<DocCheckError> = vec![]; |
| |
| let mut checks = link_checker::register_markdown_checks(&opt)?; |
| for c in checks { |
| markdown_checks.push(c); |
| } |
| |
| checks = include_checker::register_markdown_checks(&opt)?; |
| for c in checks { |
| markdown_checks.push(c); |
| } |
| |
| let mut yaml_checks = yaml::register_yaml_checks(&opt)?; |
| |
| let markdown_errors: Vec<DocCheckError> = |
| check_markdown(&markdown_files, &mut markdown_checks)?; |
| errors.extend(markdown_errors); |
| |
| let yaml_errors = check_yaml(&yaml_files, &mut yaml_checks)?; |
| errors.extend(yaml_errors); |
| |
| // Post checks |
| for c in yaml_checks { |
| match c.post_check(&markdown_files, &yaml_files).await { |
| Ok(Some(check_errors)) => errors.extend(check_errors), |
| Ok(None) => {} |
| Err(e) => errors.push(DocCheckError::new_error( |
| 0, |
| "".into(), |
| &format!("Error {} running check: {} ", e, c.name()), |
| )), |
| } |
| } |
| for c in markdown_checks { |
| match c.post_check().await { |
| Ok(Some(check_errors)) => errors.extend(check_errors), |
| Ok(None) => {} |
| Err(e) => errors.push(DocCheckError::new_error( |
| 0, |
| "".into(), |
| &format!("Error {} running check: {} ", e, c.name()), |
| )), |
| } |
| } |
| |
| let result = if errors.is_empty() { None } else { Some(errors) }; |
| Ok(result) |
| } |
| |
| /// Given the list of markdown files to check, iterate over each check, collecting any errors. |
| pub fn check_markdown<'a>( |
| files: &[PathBuf], |
| checks: &'a mut [Box<dyn DocCheck + 'static>], |
| ) -> Result<Vec<DocCheckError>> { |
| let mut errors: Vec<DocCheckError> = vec![]; |
| |
| for mdfile in files { |
| let mdcontent = fs::read_to_string(mdfile).expect("Unable to read file"); |
| let doc_context = DocContext::new_with_checks(mdfile.clone(), &mdcontent); |
| |
| for element in doc_context { |
| for c in &mut *checks { |
| match c.check(&element) { |
| Ok(Some(check_errors)) => errors.extend(check_errors), |
| Ok(None) => {} |
| Err(e) => errors.push(DocCheckError::new_error( |
| 0, |
| "".into(), |
| &format!("Error {} running check: {} ", e, c.name()), |
| )), |
| } |
| } |
| } |
| } |
| Ok(errors) |
| } |
| |
| fn check_yaml<'a>( |
| yaml_files: &[PathBuf], |
| checks: &'a mut [Box<dyn DocYamlCheck + 'static>], |
| ) -> Result<Vec<DocCheckError>> { |
| let mut errors: Vec<DocCheckError> = vec![]; |
| |
| for yaml_file in yaml_files { |
| let f = File::open(yaml_file)?; |
| let val: Value = match serde_yaml::from_reader(BufReader::new(f)) { |
| Ok(v) => v, |
| Err(e) => bail!("Error parsing {:?}: {}", yaml_file, e), |
| }; |
| for c in &mut *checks { |
| match c.check(yaml_file, &val) { |
| Ok(Some(check_errors)) => errors.extend(check_errors), |
| Ok(None) => {} |
| Err(e) => errors.push(DocCheckError::new_error( |
| 0, |
| "".into(), |
| &format!("Error {} running check: {} ", e, c.name()), |
| )), |
| } |
| } |
| } |
| |
| Ok(errors) |
| } |
| |
| #[cfg(test)] |
| mod test { |
| |
| use super::*; |
| use std::env; |
| |
| #[fuchsia_async::run_singlethreaded(test)] |
| async fn test_project_test() -> Result<()> { |
| let opt = DocCheckerArgs { |
| root: PathBuf::from("doc_checker_test_data"), |
| project: "fuchsia".to_string(), |
| docs_folder: PathBuf::from("docs"), |
| local_links_only: true, |
| json: false, |
| allow_fuchsia_src_links: false, |
| }; |
| |
| // Set the current directory to the executable dir so the relative test paths WAI. |
| env::set_current_dir(env::current_exe()?.parent().unwrap_or(&PathBuf::from(".")))?; |
| |
| let expected: Vec<DocCheckError> = vec![ |
| DocCheckError::new_error(10, PathBuf::from("doc_checker_test_data/docs/README.md"), |
| "in-tree link to /docs/missing.md could not be found at \"doc_checker_test_data/docs/missing.md\""), |
| DocCheckError::new_error(12, PathBuf::from("doc_checker_test_data/docs/README.md"), |
| "Should not link to https://fuchsia.dev/fuchsia-src/path.md via https, use relative filepath"), |
| DocCheckError::new_error(21, PathBuf::from("doc_checker_test_data/docs/README.md"), |
| "Obsolete or invalid project garnet: https://fuchsia.googlesource.com/garnet/+/refs/heads/main/README.md"), |
| DocCheckError::new_error(23,PathBuf::from("doc_checker_test_data/docs/README.md"), |
| "Cannot normalize /docs/../../README.md, references parent beyond root."), |
| DocCheckError::new_error_helpful(30,PathBuf::from("doc_checker_test_data/docs/README.md"), |
| "in-tree link to /docs/no-extension could not be found at \"doc_checker_test_data/docs/no-extension\"", "\"no-extension.md\""), |
| DocCheckError::new_error(6, PathBuf::from("doc_checker_test_data/docs/_common/_included.md"), |
| "in-tree link to /docs/missing.md could not be found at \"doc_checker_test_data/docs/missing.md\""), |
| DocCheckError::new_error(9, PathBuf::from("doc_checker_test_data/docs/include_here.md"), |
| "Included markdown file \"doc_checker_test_data/docs/_common/missing.md\" not found."), |
| DocCheckError::new_error(13, PathBuf::from("doc_checker_test_data/docs/include_here.md"), |
| "Included markdown file \"/docs/_common/_included.md\" must be a relative path."), |
| DocCheckError::new_error(2, PathBuf::from("doc_checker_test_data/docs/no_readme/details.md"), |
| "in-tree link to /docs/no_readme could not be found at \"doc_checker_test_data/docs/no_readme\" or \"doc_checker_test_data/docs/no_readme/README.md\""), |
| DocCheckError::new_error(4,PathBuf::from("doc_checker_test_data/docs/path.md"), |
| "in-tree link to /docs/missing-image.png could not be found at \"doc_checker_test_data/docs/missing-image.png\""), |
| // There are 3 instances of [i] on the same line. |
| DocCheckError::new_error_helpful(17, PathBuf::from("doc_checker_test_data/docs/path.md"), |
| "unescaped [i] not treating this as a shortcut link.", "escape brackets \\[i\\] or make a link [i](/docs/i)"), |
| DocCheckError::new_error_helpful(17, PathBuf::from("doc_checker_test_data/docs/path.md"), |
| "unescaped [i] not treating this as a shortcut link.", "escape brackets \\[i\\] or make a link [i](/docs/i)"), |
| DocCheckError::new_error_helpful(17, PathBuf::from("doc_checker_test_data/docs/path.md"), |
| "unescaped [i] not treating this as a shortcut link.", "escape brackets \\[i\\] or make a link [i](/docs/i)"), |
| DocCheckError::new_error(6, PathBuf::from("doc_checker_test_data/docs/second.md"), |
| "Invalid link http://{}.com/markdown : invalid uri character"), |
| DocCheckError::new_error(10, PathBuf::from("doc_checker_test_data/docs/second.md"), |
| "Cannot normalize /docs/../../missing.md, references parent beyond root."), |
| DocCheckError::new_error(1, PathBuf::from("doc_checker_test_data/docs/unused/_toc.yaml"), |
| "in-tree link to /docs/unused could not be found at \"doc_checker_test_data/docs/unused\" or \"doc_checker_test_data/docs/unused/README.md\""), |
| DocCheckError::new_error(0, PathBuf::from("doc_checker_test_data/docs/_toc.yaml"), |
| "Cannot find file \"doc_checker_test_data/docs/missing/_toc.yaml\" included in \"doc_checker_test_data/docs/_toc.yaml\""), |
| DocCheckError::new_error(0, PathBuf::from("doc_checker_test_data/docs/cycle/_toc.yaml"), |
| "YAML files cannot include themselves \"doc_checker_test_data/docs/cycle/_toc.yaml\""), |
| DocCheckError::new_error(0, PathBuf::from("doc_checker_test_data/docs/unreachable.md"), |
| "File not referenced in any _toc.yaml files."), |
| DocCheckError::new_error(0, PathBuf::from("doc_checker_test_data/docs/unused/_toc.yaml"), |
| "File not reachable via _toc include references."), |
| ]; |
| |
| if let Some(actual_errors) = do_main(&opt).await? { |
| let mut expected_iter = expected.iter(); |
| for actual in actual_errors { |
| if let Some(expected) = expected_iter.next() { |
| assert_eq!(&actual, expected); |
| } else { |
| bail!("Unexpected error: {:?}", actual); |
| } |
| } |
| let expected_errors: Vec<&DocCheckError> = expected_iter.collect(); |
| // Should be no other expected errors |
| if !expected_errors.is_empty() { |
| bail!("Expected at least one more error. Missing error(s): {:?}", expected_errors); |
| } |
| } |
| Ok(()) |
| } |
| } |