[intl] Tightens the string naming controls
In an early field test, lucasradaelli@ discovered that `strings_to_fidl`
let an identifier name such as "role_table," go through. Notice the
extra unwanted comma at the end.
While `strings_to_fidl` doesn't really have a preference, the FIDL file
that is generated based on strings.xml ends up with unallowed syntax.
So, we tighten the behavior to fail fast instead of generating a FIDL
file that won't pass a followup FIDL compilation step.
Example error given below:
```
╰─>$ ./strings_to_fidl --input=$HOME/strings.xml --output=$HOME/strings.xml.out ; cat $HOME/strings.xml.out
Error: while parsing dictionary
Caused by:
0: in state: String { name: "string_name,", text: "text_string" }
1: name is not acceptable: 'string_name,', does not match: ^[_a-zA-Z][_a-zA-Z_0-9]*$
```
Bug: 50720
Change-Id: Id178b3c4af434803ced1251a284c2a43947937c8
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/385229
Commit-Queue: Filip Filmar <fmil@google.com>
Reviewed-by: Konstantin Pozin <kpozin@google.com>
Testability-Review: Filip Filmar <fmil@google.com>
diff --git a/src/lib/intl/strings/BUILD.gn b/src/lib/intl/strings/BUILD.gn
index a665f82..b4daf51 100644
--- a/src/lib/intl/strings/BUILD.gn
+++ b/src/lib/intl/strings/BUILD.gn
@@ -27,9 +27,10 @@
deps = [
"//third_party/rust_crates:anyhow",
"//third_party/rust_crates:handlebars",
+ "//third_party/rust_crates:lazy_static",
+ "//third_party/rust_crates:regex",
"//third_party/rust_crates:serde",
"//third_party/rust_crates:serde_json",
- "//third_party/rust_crates:structopt",
"//third_party/rust_crates:thiserror",
"//third_party/rust_crates:xml-rs",
]
diff --git a/src/lib/intl/strings/src/parser.rs b/src/lib/intl/strings/src/parser.rs
index 67a0b99..fa7da39 100644
--- a/src/lib/intl/strings/src/parser.rs
+++ b/src/lib/intl/strings/src/parser.rs
@@ -6,6 +6,8 @@
use {
anyhow::{anyhow, Context, Error, Result},
+ lazy_static::lazy_static,
+ regex::Regex,
std::collections::BTreeMap,
std::io,
xml::attribute,
@@ -93,6 +95,31 @@
Done,
}
+lazy_static! {
+ // All string names must match this regex string. (This should be fixed up
+ // to match the FIDL naming rules)
+ //
+ // Example:
+ // __hello_WORLD -- acceptable
+ // 0cool -- not acceptable, leading number
+ // добар_дан -- not acceptable, not A-Z.
+ static ref RAW_REGEX_STRING: &'static str = r"^[_a-zA-Z][_a-zA-Z_0-9]*$";
+
+ static ref VALID_TOKEN_NAME: Regex = Regex::new(&RAW_REGEX_STRING).unwrap();
+}
+
+// Verifies that [name] is a valid string name.
+pub(crate) fn validate_token_name(name: &str, verbose: bool) -> bool {
+ let is_match = VALID_TOKEN_NAME.is_match(name);
+ veprintln!(
+ verbose,
+ "validate_token_name: checking identifier: '{}'; is_match: {}",
+ name,
+ is_match
+ );
+ is_match
+}
+
/// The XML parser state.
#[derive(Debug)]
pub struct Instance {
@@ -210,6 +237,13 @@
if self.messages.contains_key(&token_name) {
return Err(anyhow!("duplicate string with name: {}", token_name));
}
+ if !validate_token_name(&token_name, self.verbose) {
+ return Err(anyhow!(
+ "name is not acceptable: '{}', does not match: {}",
+ &token_name,
+ *RAW_REGEX_STRING
+ ));
+ }
self.messages.insert(token_name, text);
self.state = State::Resources;
}
@@ -432,6 +466,19 @@
</resources>
"#,
},
+ TestCase {
+ name: "string_name has unexpected comma",
+ content: r#"
+ <!-- comment -->
+ <?xml version="1.0" encoding="utf-8"?>
+ <resources>
+ <!-- comment -->
+ <string
+ name="string_name,"
+ >text_string</string>
+ </resources>
+ "#,
+ },
];
for test in tests {
let input = EventReader::from_str(&test.content);
@@ -446,4 +493,23 @@
}
Ok(())
}
+
+ use super::validate_token_name;
+
+ #[test]
+ fn acceptable_and_unacceptable_names() -> Result<(), Error> {
+ assert_eq!(true, validate_token_name("_hello", true /*verbose*/));
+ assert_eq!(true, validate_token_name("__hello", true /*verbose*/));
+ assert_eq!(true, validate_token_name("__HELLO__", true /*verbose*/));
+ assert_eq!(true, validate_token_name("__H_e_l_l_o__", true /*verbose*/));
+ assert_eq!(true, validate_token_name("__H_e_l_l_o_1234__", true /*verbose*/));
+
+ // Starts with zero
+ assert_eq!(false, validate_token_name("0cool", true /*verbose*/));
+ // Unexpected comma.
+ assert_eq!(false, validate_token_name("role_table,", true /*verbose*/));
+ // Unicode overdose.
+ assert_eq!(false, validate_token_name("хелло_њорлд", true /*verbose*/));
+ Ok(())
+ }
}