blob: b32df6b8fd24eaec1cbe3d0a0a375cd0c83f4023 [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! yaml checks the yaml files that part of the //docs publishing process
//! for correctness.
use {
self::toc_checker::Toc,
crate::{
link_checker::{
check_external_links, do_check_link, do_in_tree_check, is_intree_link, LinkReference,
PUBLISHED_DOCS_HOST,
},
DocCheckError, DocCheckerArgs, DocLine, DocYamlCheck,
},
anyhow::Result,
async_trait::async_trait,
serde::{de::DeserializeOwned, Deserialize},
serde_yaml::{Mapping, Value},
std::{
collections::{HashMap, HashSet},
ffi::OsStr,
path::{self, Path, PathBuf},
},
};
mod toc_checker;
cfg_if::cfg_if! {
if #[cfg(test)] {
use crate::mock_path_helper_module as path_helper;
} else {
use crate::path_helper_module as path_helper;
}
}
#[derive(Deserialize, PartialEq, Debug)]
struct AreaEntry {
name: String,
api_primary: String,
api_secondary: String,
description: Option<String>,
examples: Option<Vec<Mapping>>,
}
#[derive(Deserialize, PartialEq, Debug)]
struct Deprecations {
included: Vec<FromTo>,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct DriverEpitaph {
short_description: String,
deletion_reason: String,
gerrit_change_id: String,
available_in_git: String,
areas: Option<Vec<String>>,
path: String,
}
#[derive(Deserialize, PartialEq, Debug)]
struct EngCouncil {
members: Vec<String>,
}
#[derive(Deserialize, Eq, PartialEq, Debug)]
pub struct FromTo {
pub from: String,
pub to: String,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct GlossaryTerm {
term: String,
short_description: String,
full_description: Option<String>,
see_also: Option<Vec<String>>,
related_guides: Vec<String>,
area: Vec<String>,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct GuideEntry {
#[serde(alias = "type")]
entry_type: String,
product: String,
board: String,
method: String,
host: String,
url: String,
title: String,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct Metadata {
descriptions: Mapping,
columns: Vec<String>,
types: Vec<String>,
products: Vec<String>,
boards: Vec<String>,
methods: Vec<String>,
hosts: Vec<String>,
guides: Vec<GuideEntry>,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct ProblemEntry {
key: String,
use_case: String,
description: String,
#[serde(alias = "related-problems")]
related_problems: Vec<String>,
}
#[derive(Deserialize, PartialEq, Debug)]
struct Redirects {
redirects: Vec<FromTo>,
}
#[derive(Deserialize, PartialEq, Debug)]
struct RfcEntry {
name: String,
title: String,
short_description: String,
authors: Vec<String>,
file: String,
area: Vec<String>,
issue: Vec<String>,
gerrit_change_id: Vec<String>,
status: String,
reviewers: Vec<String>,
submitted: String,
reviewed: String,
}
#[derive(Deserialize, PartialEq, Debug)]
struct RoadmapEntry {
workstream: String,
area: String,
category: Vec<String>,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct SysConfigEntry {
name: String,
description: String,
architecture: String,
#[serde(alias = "RAM")]
ram: Option<String>,
storage: Option<String>,
manufacturer_link: Option<String>,
board_driver_location: String,
}
#[derive(Deserialize, Debug)]
// Dead code is used here so the names
// of the fields can be used by Deserialize
// even though there is no reading of the fields.
#[allow(dead_code)]
struct ToolsEntry {
name: String,
team: String,
links: Mapping,
description: String,
related: Option<Vec<String>>,
}
#[derive(Debug, Clone)]
// Represents a yaml file included in another yaml file.
// The container is the file with the reference to the
// included_file.
pub(crate) struct IncludedYaml {
pub(crate) container: PathBuf,
pub(crate) included_file: PathBuf,
}
#[derive(Debug)]
pub(crate) struct YamlChecker {
root_dir: PathBuf,
docs_folder: PathBuf,
project: String,
check_external_links: bool,
allow_fuchsia_src_links: bool,
}
#[async_trait]
impl DocYamlCheck for YamlChecker {
fn name(&self) -> &str {
"DocYamlCheck"
}
fn check<'a>(
&mut self,
filename: &Path,
yaml_value: &serde_yaml::Value,
) -> Result<Option<Vec<DocCheckError>>> {
if let Some(yaml_name) = filename.file_name() {
let result = match yaml_name.to_str() {
Some("_areas.yaml") => check_areas(filename, yaml_value),
Some("_deprecated-docs.yaml") => check_deprecated_docs(filename, yaml_value),
Some("_drivers_areas.yaml") => check_drivers_areas(filename, yaml_value),
Some("_drivers_epitaphs.yaml") => check_drivers_epitaphs(filename, yaml_value),
Some("_eng_council.yaml") => check_eng_council(filename, yaml_value),
Some("_glossary.yaml") => check_glossary(filename, yaml_value),
Some("_metadata.yaml") => check_metadata(filename, yaml_value),
Some("_problems.yaml") => check_problems(filename, yaml_value),
Some("_redirects.yaml") => check_redirects(filename, yaml_value),
Some("_rfcs.yaml") => check_rfcs(filename, yaml_value),
Some("_roadmap.yaml") => check_roadmap(filename, yaml_value),
Some("_supported_cpu_architecture.yaml") => {
check_supported_cpu_architecture(filename, yaml_value)
}
Some("_supported_sys_config.yaml") => {
check_supported_sys_config(filename, yaml_value)
}
Some("_toc.yaml") => toc_checker::check_toc(
&self.root_dir,
&self.docs_folder,
&self.project,
filename,
yaml_value,
self.allow_fuchsia_src_links,
),
Some("_tools.yaml") => check_tools(filename, yaml_value),
Some(name) => todo!("Need to handle {} ({:?})", name, filename),
_ => panic!("No str avail for {:?}", filename),
};
Ok(result)
} else {
Ok(None)
}
}
async fn post_check(
&self,
_markdown_files: &[PathBuf],
_yaml_files: &[PathBuf],
) -> Result<Option<Vec<DocCheckError>>> {
let mut yaml_file_set: HashSet<&PathBuf> = HashSet::from_iter(_yaml_files.iter());
let mut visited: HashMap<PathBuf, IncludedYaml> = HashMap::new();
let mut markdown_file_set: HashSet<&PathBuf> = HashSet::from_iter(_markdown_files.iter());
let mut errors = vec![];
let mut external_links = vec![];
// Some special paths that are not in the //docs dir that need to be added
let code_of_conduct_md = self.root_dir.join("CODE_OF_CONDUCT.md");
markdown_file_set.insert(&code_of_conduct_md);
let contrib_md = self.root_dir.join("CONTRIBUTING.md");
markdown_file_set.insert(&contrib_md);
// Start with //docs/_toc.yaml
let mut toc_stack: Vec<IncludedYaml> = vec![IncludedYaml {
container: self.root_dir.join("docs/_toc.yaml").into(),
included_file: self.root_dir.join("docs/_toc.yaml").into(),
}];
while let Some(current_yaml) = toc_stack.pop() {
if let Some(yaml_doc) = yaml_file_set.take(&current_yaml.included_file) {
visited.insert(yaml_doc.clone(), current_yaml.clone());
let toc = Toc::from(yaml_doc)?;
// remove paths to markdown
if let Some(path_list) = toc.get_paths() {
for p in path_list {
if is_external_path(&p) {
if self.check_external_links {
if p.starts_with("/reference") {
external_links.push(LinkReference {
link: format!("https://{}{}", PUBLISHED_DOCS_HOST, p),
location: DocLine {
line_num: 0,
file_name: current_yaml.included_file.clone(),
},
});
} else if p.starts_with("https://") || p.starts_with("http://") {
external_links.push(LinkReference {
link: p.to_string(),
location: DocLine {
line_num: 0,
file_name: current_yaml.included_file.clone(),
},
});
} else if p.starts_with("//") {
external_links.push(LinkReference {
link: format!("https:{}", p),
location: DocLine {
line_num: 0,
file_name: current_yaml.included_file.clone(),
},
});
}
}
continue;
} else {
let rel_path = p.strip_prefix('/').unwrap_or(p.as_str());
let mut file_path = self.root_dir.join(rel_path);
if path_helper::is_dir(&file_path) {
file_path.push("README.md");
}
if markdown_file_set.take(&file_path).is_none()
&& !visited.contains_key(&file_path)
{
errors.push(DocCheckError::new_error(
0,
yaml_doc.clone(),
&format!("Reference to missing file: {}", p),
));
} else {
visited.insert(file_path, current_yaml.clone());
}
}
}
}
// follow include
if let Some(includes) = toc.get_includes() {
// All includes are /docs/... so just append the root.
let additional_paths = includes
.iter()
//Ignoring yaml included from /reference.
.filter(|p| !p.starts_with("/reference"))
.map(|p| self.root_dir.join(p.strip_prefix('/').unwrap_or(p.as_str())))
.filter(|p| {
if p == &current_yaml.included_file {
errors.push(DocCheckError::new_error(
0,
p.clone(),
&format!("YAML files cannot include themselves {p:?}"),
));
false
} else {
true
}
});
toc_stack.extend(
additional_paths.map(|f| IncludedYaml {
container: yaml_doc.clone(),
included_file: f,
}),
);
}
} else if !visited.contains_key(&current_yaml.included_file) {
errors.push(DocCheckError::new_error(
0,
current_yaml.container.clone(),
&format!(
"Cannot find file {:?} included in {:?}",
&current_yaml.included_file, &current_yaml.container
),
));
}
}
markdown_file_set
.iter()
.filter(|f| **f != &code_of_conduct_md && **f != &contrib_md)
.filter(|p| {
if let Some(name) = p.file_name() {
name != "navbar.md" && !name.to_str().unwrap_or_default().starts_with('_')
} else {
false
}
})
.filter(|p| {
!p.components().any(|c| c == path::Component::Normal(OsStr::new("_common")))
})
.filter(|p| !p.ends_with("gen/build_arguments.md"))
.copied()
.for_each(|f| {
errors.push(DocCheckError::new_error(
0,
f.clone(),
"File not referenced in any _toc.yaml files.",
));
});
yaml_file_set.iter().filter(|f| f.ends_with("_toc.yaml")).for_each(|&f| {
errors.push(DocCheckError::new_error(
0,
f.clone(),
"File not reachable via _toc include references.",
))
});
if self.check_external_links {
if let Some(link_errors) = check_external_links(&external_links).await {
for e in link_errors {
errors.push(e);
}
}
}
if errors.is_empty() {
Ok(None)
} else {
Ok(Some(errors))
}
}
}
fn is_external_path(p: &str) -> bool {
// treat reference docs as external
p.starts_with("/reference")
|| p.starts_with("https://")
|| p.starts_with("http://")
|| p.starts_with("//")
}
/// Checks the path property from a yaml file.
fn check_path(
doc_line: &DocLine,
root_path: &Path,
docs_folder: &Path,
project: &str,
path: &str,
allow_fuchsia_src_links: bool,
) -> Option<DocCheckError> {
let root_dir = root_path.display().to_string();
match do_check_link(doc_line, path, project, allow_fuchsia_src_links) {
Ok(Some(doc_error)) => return Some(doc_error),
Err(e) => {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&e.to_string(),
))
}
Ok(None) => {}
};
// These files are in the root of the project, not in the docs directory, so they need special
// treatment.
if ["/CONTRIBUTING.md", "/CODE_OF_CONDUCT.md"].contains(&path) {
let filepath = root_path.join(path.strip_prefix('/').unwrap_or(path));
if !path_helper::exists(&filepath) {
return Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("File: {:?} not found.", filepath),
));
}
return None;
}
match is_intree_link(project, &root_dir, docs_folder, path) {
Ok(Some(in_tree_path)) => {
// Handle in-tree paths that are not in the docs_folder.
// Since this is a table of contents, all the entries need
// to be to the docs_folder, except /reference, which is a special case.
if !in_tree_path.starts_with(PathBuf::from("/").join(docs_folder)) {
if in_tree_path.starts_with("/reference") {
None
} else {
Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!(
"Invalid path {}. Path must be in /docs (checked: {:?}",
path, in_tree_path
),
))
}
} else {
do_in_tree_check(doc_line, root_path, docs_folder, path, &in_tree_path)
}
}
// Accept external links.
Ok(None) if is_external_path(path) => None,
Ok(None) => Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("invalid path {}", path),
)),
Err(e) => Some(DocCheckError::new_error(
doc_line.line_num,
doc_line.file_name.clone(),
&format!("Error checking path {}: {}", path, e),
)),
}
}
fn check_areas(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
//TODO(https://fxbug.dev/42064921): Align _areas.yaml on same schema.
if filename.ends_with("contribute/governance/areas/_areas.yaml") {
let (_items, errors) = parse_entries::<AreaEntry>(filename, yaml_value);
//TODO(https://fxbug.dev/42064922): other checks for AreaEntry?
errors
} else {
let (_items, errors) = parse_entries::<String>(filename, yaml_value);
//TODO(https://fxbug.dev/42064922): other checks for AreaEntry?
errors
}
}
fn check_deprecated_docs(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let result = serde_yaml::from_value::<Deprecations>(yaml_value.clone());
//TODO(https://fxbug.dev/42064923): Add a check that the to: doc exists.
match result {
Ok(_) => None,
Err(e) => Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!("invalid structure {}", e),
)]),
}
}
fn check_drivers_areas(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let result = serde_yaml::from_value::<Vec<String>>(yaml_value.clone());
//TODO(https://fxbug.dev/42064921): Align on common _areas.yaml structure
match result {
Ok(_redirects) => None,
Err(e) => Some(vec![DocCheckError::new_error(
1,
filename.into(),
&format!("invalid structure for _drivers_areas {}. Data: {:?}", e, yaml_value),
)]),
}
}
fn check_drivers_epitaphs(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<DriverEpitaph>(filename, yaml_value);
//TODO(https://fxbug.dev/42064924): other checks for DriverEpitaph?
errors
}
fn check_eng_council(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let result = serde_yaml::from_value::<EngCouncil>(yaml_value.clone());
match result {
Ok(_redirects) => None,
Err(e) => Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!("invalid structure for EngCouncil {}. Found {:?}", e, yaml_value),
)]),
}
}
fn check_glossary(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<GlossaryTerm>(filename, yaml_value);
//TODO(https://fxbug.dev/42064926): other checks for GlossaryTerm?
errors
}
fn check_metadata(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let result = serde_yaml::from_value::<Metadata>(yaml_value.clone());
//TODO(https://fxbug.dev/42064928): Add checks for metadata.
match result {
Ok(_redirects) => None,
Err(e) => Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!("invalid structure for _metadata {}. Data: {:?}", e, yaml_value),
)]),
}
}
fn check_problems(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<ProblemEntry>(filename, yaml_value);
//TODO(https://fxbug.dev/42064929): other checks for ProblemEntry?
errors
}
fn check_redirects(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let result = serde_yaml::from_value::<Redirects>(yaml_value.clone());
//TODO(https://fxbug.dev/42064930): add valication to redirects.
match result {
Ok(_) => None,
Err(e) => Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!("invalid structure {}", e),
)]),
}
}
fn check_rfcs(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<RfcEntry>(filename, yaml_value);
//TODO(https://fxbug.dev/42064931): other checks for RfcEntry?
errors
}
fn check_roadmap(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<RoadmapEntry>(filename, yaml_value);
//TODO(https://fxbug.dev/42064932): other checks for RoadmapEntry?
errors
}
fn check_supported_cpu_architecture(
filename: &Path,
yaml_value: &Value,
) -> Option<Vec<DocCheckError>> {
let result = serde_yaml::from_value::<Vec<String>>(yaml_value.clone());
//TODO(https://fxbug.dev/42064933): Add validation
match result {
Ok(_redirects) => None,
Err(e) => Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!(
"invalid structure for _supported_cpu_architecture {}. Data: {:?}",
e, yaml_value
),
)]),
}
}
fn check_supported_sys_config(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<SysConfigEntry>(filename, yaml_value);
//TODO(https://fxbug.dev/42064934): other checks for SysConfigEntry?
errors
}
fn check_tools(filename: &Path, yaml_value: &Value) -> Option<Vec<DocCheckError>> {
let (_items, errors) = parse_entries::<ToolsEntry>(filename, yaml_value);
//TODO(https://fxbug.dev/42064935): other checks for ToolsEntry?
errors
}
/// parses the yaml_value into a list of T elements.
/// returns the items successfully parsed, and any errors encountered.
fn parse_entries<T: DeserializeOwned>(
filename: &Path,
yaml_value: &Value,
) -> (Option<Vec<T>>, Option<Vec<DocCheckError>>) {
if let Some(item_list) = yaml_value.as_sequence() {
if item_list.is_empty() {
(
None,
Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!("unexpected empty list for {:?} file, got {:?}", filename, yaml_value),
)]),
)
} else {
let mut errors: Vec<DocCheckError> = vec![];
let mut items: Vec<T> = vec![];
for item in item_list {
let result = serde_yaml::from_value::<T>(item.clone());
match result {
Ok(element) => items.push(element),
Err(e) => {
errors.push(DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!(
"invalid structure for {:?} entry: {}. Data: {:?}",
filename, e, item
),
));
}
};
}
let ret_items = if items.is_empty() { None } else { Some(items) };
let ret_errors = if errors.is_empty() { None } else { Some(errors) };
(ret_items, ret_errors)
}
} else {
(
None,
Some(vec![DocCheckError::new_error(
1,
filename.to_path_buf(),
&format!(
"unable to parse sequence for {:?} file, expected Sequence, got {:?}",
filename, yaml_value
),
)]),
)
}
}
/// Called from main to register all the checks to preform which are implemented in this module.
pub fn register_yaml_checks(opt: &DocCheckerArgs) -> Result<Vec<Box<dyn DocYamlCheck>>> {
let checker = YamlChecker {
root_dir: opt.root.clone(),
docs_folder: opt.docs_folder.clone(),
project: opt.project.clone(),
check_external_links: !opt.local_links_only,
allow_fuchsia_src_links: opt.allow_fuchsia_src_links,
};
Ok(vec![Box::new(checker)])
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_check_path() -> Result<()> {
let doc_line = &DocLine { line_num: 1, file_name: PathBuf::from("test-check-path") };
let root_path = PathBuf::from("/some/root");
let docs_folder = PathBuf::from("docs");
let project = "fuchsia";
let allow_fuchsia_src_links = false;
let test_data: [(&str, Option<DocCheckError>); 7] = [
("/CONTRIBUTING.md", None),
("/CODE_OF_CONDUCT.md", None),
(
"/README.md",
Some(DocCheckError::new_error(
1,
PathBuf::from("test-check-path"),
"Invalid path /README.md. Path must be in /docs (checked: \"/README.md\"",
)),
),
("https://fuchsia.dev/reference/to/something-else.md", None),
("/docs/are-ok.md", None),
("https://somewhere.com/is-ok", None),
(
"/src/main.cc",
Some(DocCheckError::new_error(
1,
PathBuf::from("test-check-path"),
"Invalid path /src/main.cc. Path must be in /docs (checked: \"/src/main.cc\"",
)),
),
];
for (test_path, expected_result) in test_data {
let actual_result = check_path(
doc_line,
&root_path,
&docs_folder,
project,
test_path,
allow_fuchsia_src_links,
);
assert_eq!(actual_result, expected_result);
}
Ok(())
}
#[test]
fn test_check_areas() -> Result<()> {
// Test is more complex because of todo
//TODO(https://fxbug.dev/42064921): Align _areas.yaml on same schema.
let filename = "/some/docs/contribute/governance/areas/_areas.yaml";
let yaml_value: Value = serde_yaml::from_str(
r#"
- name: 'Area1'
api_primary: 'someone@google.com'
api_secondary: 'someonelese@google.com'
description: |
<p>
This is an area.
</p>
examples:
- fidl: 'fuchsia.docs.samples'
"#,
)?;
assert_eq!(check_areas(&PathBuf::from(filename), &yaml_value), None);
Ok(())
}
}