Support for simplified escape sequences in strings

Change-Id: I2ec28d254934170cf6867b04e776a5cb1f4b2cff
Reviewed-on: https://fuchsia-review.googlesource.com/c/intellij-language-fidl/+/774602
Reviewed-by: Alex Zaslavsky <azaslavsky@google.com>
diff --git a/resources/META-INF/plugin.xml b/resources/META-INF/plugin.xml
index d79919d..fe3f05a 100644
--- a/resources/META-INF/plugin.xml
+++ b/resources/META-INF/plugin.xml
@@ -21,6 +21,7 @@
         <li><b>0.19</b>
           <ul>
             <li>Remove support for deprecated struct defaults.</li>
+            <li>Support for simplified escape sequences</li>
           </ul>
         </li>
         <li><b>0.18</b>
diff --git a/src/fuchsia/developer/plugin/fidl/ContextAwareHighlighter.java b/src/fuchsia/developer/plugin/fidl/ContextAwareHighlighter.java
index 87f56dd..fe11105 100644
--- a/src/fuchsia/developer/plugin/fidl/ContextAwareHighlighter.java
+++ b/src/fuchsia/developer/plugin/fidl/ContextAwareHighlighter.java
@@ -167,33 +167,34 @@
    */
   private static int isEscape(char[] chars, int offset) {
     // Is it a single-character escape sequence?
-    if ("abfnrtv\\\"".indexOf(chars[offset]) >= 0) return 1;
-
-    // Is it a hex or unicode escape sequence?
-    int numHexDigits = 0;
-    if (chars[offset] == 'x') {
-      numHexDigits = 2;
-    } else if (chars[offset] == 'u') {
-      numHexDigits = 4;
-    } else if (chars[offset] == 'U') {
-      numHexDigits = 8;
+    if ("nrt\\\"".indexOf(chars[offset]) >= 0) {
+      return 1;
     }
-    if (numHexDigits != 0) {
-      if (offset + numHexDigits > chars.length - 1) return -2;
-      for (int i = offset + 1; i <= offset + numHexDigits; i++) {
-        if (Character.digit(chars[i], 16) == -1) {
-          return -2;
-        }
+
+    // Is it a unicode escape sequence?
+    if (chars[offset] == 'u') {
+      if (offset + 2 >= chars.length - 1 || chars[offset + 1] != '{') {
+        return -1;
       }
-      return numHexDigits + 1;
-    }
 
-    // Is it an octal escape sequence? (This is placed last because octals are very rare.)
-    if (offset + 2 < chars.length) {
-      String octals = "01234567";
-      if (octals.indexOf(chars[offset]) >= 0
-          && octals.indexOf(chars[offset + 1]) >= 0
-          && octals.indexOf(chars[offset + 2]) >= 0) return 3;
+      int i = offset + 2;
+      StringBuilder number = new StringBuilder();
+      while (Character.digit(chars[i], 16) != -1) {
+        if (i - offset == 7) {
+          // too long!
+          return -1;
+        }
+        number.append(chars[i]);
+        i++;
+      }
+      if (chars[i] != '}' || number.length() == 0) {
+        return -1;
+      }
+      int value = Integer.parseInt(number.toString(), 16);
+      if (value < 0 || value >= 0x10fff) {
+        return -1; // illegal unicode value
+      }
+      return i - offset;
     }
 
     // Unknown escape sequence
@@ -201,7 +202,7 @@
   }
 
   /**
-   * Checks if the node in question, and returns its type, or null if its not a layout.
+   * Checks the node in question, and returns its type, or null if it's not a layout.
    *
    * @param element PsiElement of the node in question.
    * @return The IElementType describing this layout (or null if this is not a layout).
diff --git a/test/bad.test.fidl b/test/bad.test.fidl
index 2083eed..b3b4acf 100644
--- a/test/bad.test.fidl
+++ b/test/bad.test.fidl
@@ -4,8 +4,12 @@
 const string bad_single_char_escape = "\q";
 const string bad_octal = "octal \01";
 const string bad_hex = "\x1 hex";
-const string bad_unicode_32 = "\u1234\u12";
-const string bad_unicode_64 = "\U1234\U12345678";
+const string octal = "\017 octal";
+const string hex = "\x12";
+const string unicode_wrong_braces = "\u{}\u{\u1234";
+const string slash_u_terminated = "\u";
+const string too_much_unicode = "\u{cafe111}";
+const string unicode_64 = "unicode code point \u{12abcd34}";
 
 // Second `flexible` should be flagged.
 type FlexibleCantRepeat = flexible flexible enum {
diff --git a/test/good.test.fidl b/test/good.test.fidl
index e4f548e..37d7d18 100644
--- a/test/good.test.fidl
+++ b/test/good.test.fidl
@@ -8,10 +8,8 @@
 const ascii string = "this is ascii";
 
 const string single_char_escape = "escaping \t";
-const string octal = "\017 octal";
-const string hex = "\x12";
-const string unicode_32 = "\u12a4 unicode code point";
-const string unicode_64 = "unicode code point \U12abcd34";
+const string unicode_32 = "\u{12a4} unicode code point";
+const string more_unicode_32 = "\u{1234}\u{12}";
 
 alias Foo = foo.bar.baz;