[quickjs] Fix UBSan bugs There were 3 UB issues (and one unique issue) when attempting to build quickjs with UBSan: 1. A left shift into the sign bit of an int. 2. Passing a null pointer into the second argument of memcpy(), which is marked with the nonnull attribute. 3. Signed integer overflow when converting a double to an int32_t. 4. (Unique) qjs would throw a stack overflow exception if JS_CallInternal was instrumented with UBSan (adding __attribute__((no_sanitize("undefined"))) would silence it). Solutions: 1. This only happens in a macro that l-shifts an enum value 24 places. Casting this to an unsigned value then performing the bitwise OR will circumvent the UB. We can then do a two's complement operation (via a macro I added) to effectively cast back to an int type. The macro itself just does some arithmetic to get the two's complement. We can't use type punning (via unions) to cast the unsigned to a signed since that makes assumptions about the underlying representation of a signed int. (To make it simpler, we can technically assume we're using a two's complement representation since Clang uses that representation for signed ints). I am able to assert that using this macro still results in the same codegen (with optimizations) as the previous code, but without UB. 2. (May not the best solution, but the code still seems to work.) Just check if the second operand is a nullptr before calling memcpy(). 3. Instead implicitly convert the double to a uint32_t (which can hold the range of values this double represents), then use the two's complement macro in (1) to get a int32_t. I'm also able to assert the codegen is the same with optimizations. 4. I was able to trace down what triggers this exception to the dispatch table in JS_CallInternal. I don't exactly understand how, but somehow, instrumenting with UBSan on this particular table leads to either the JS runtime stack_top pointer or js_get_stack_pointer() to yield the wrong values, hitting the check that triggers the exception. As a workaround, we can instead just use a normal switch by setting DIRECT_DISPATCH to 0. This just instead opts for using switch-case over the table, which the compiler can just choose to replace with a table if it's more fitting. This doesn't change any functionality since it effectively does the same task. This will require updating the flower afterwards. Bug: 47041 Change-Id: Ifc9fc921de5542bec6516e14f3d6038409d2745f

commit: d86b5af0ab340250b2a68ab75e13ea214f9b80c5 [log] [tgz]
author: Leonard Chan <leonardchan@google.com> Wed Dec 09 11:40:05 2020 -0800
committer: Leonard Chan <leonardchan@google.com> Thu Dec 10 21:13:47 2020 +0000
tree: 322d85f295c5c2280a39e6e4d9b98d046169b2b2
parent: 69b041bc41fc8155eba9e6c9e86315b9c06e54c3 [diff]
diff --git a/BUILD.gn b/BUILD.gn
index 3bfcbc4..0332605 100644
--- a/BUILD.gn
+++ b/BUILD.gn

@@ -14,6 +14,12 @@
     "_GNU_SOURCE",
     "CONFIG_VERSION=\"" + version_lines[0] + "\"",
     "CONFIG_BIGNUM",
+
+    # Use a switch rather than a manually created dispatch table. Using the
+    # host-asan_ubsan variant causes the interpreter to crash with an internal
+    # stack overflow error if this table is used. The compiler should already
+    # decide which is the best way to lower a switch statement anyway.
+    "DIRECT_DISPATCH=0",
   ]
 
   # Suppress warnings in upstream code that are triggered by Fuchsia compilation flags.
@@ -42,10 +48,6 @@
     deps = [ "//sdk/lib/fdio" ]
   }
 
-  # TODO(47041): UBSan has found an instance of undefined behavior in this target.
-  # Disable UBSan for this target temporarily until it is migrated into CI/CQ.
-  configs += [ "//build/config:temporarily_disable_ubsan_do_not_use" ]
-
   public_configs = [ ":qjs-config" ]
 }
 

diff --git a/cutils.c b/cutils.c
index a02fb76..39f4786 100644
--- a/cutils.c
+++ b/cutils.c

@@ -140,6 +140,14 @@
         if (dbuf_realloc(s, s->size + len))
             return -1;
     }
+
+    // FIXME: It would be preferable to instead apply fixes to the source of
+    // whatever passes a null pointer or zero length, but there are many
+    // instances this function and used and different call paths can lead to
+    // this.
+    if (!data || !len)
+      return 0;
+
     memcpy(s->buf + s->size, data, len);
     s->size += len;
     return 0;

diff --git a/quickjs.c b/quickjs.c
index 66e11fe..3ea7f46 100644
--- a/quickjs.c
+++ b/quickjs.c

@@ -48,11 +48,14 @@
 
 #define OPTIMIZE         1
 #define SHORT_OPCODES    1
+
+#if !defined(DIRECT_DISPATCH)
 #if defined(EMSCRIPTEN)
 #define DIRECT_DISPATCH  0
 #else
 #define DIRECT_DISPATCH  1
 #endif
+#endif
 
 #if defined(__APPLE__)
 #define MALLOC_OVERHEAD  0
@@ -30222,9 +30225,9 @@
     JSAtom atom;
 } CodeContext;
 
-#define M2(op1, op2)            ((op1) | ((op2) << 8))
-#define M3(op1, op2, op3)       ((op1) | ((op2) << 8) | ((op3) << 16))
-#define M4(op1, op2, op3, op4)  ((op1) | ((op2) << 8) | ((op3) << 16) | ((op4) << 24))
+#define M2(op1, op2)            (TWOS_COMPLEMENT_UINT32_TO_INT32(((unsigned int)op1) | (((unsigned int)op2) << 8)))
+#define M3(op1, op2, op3)       (TWOS_COMPLEMENT_UINT32_TO_INT32(((unsigned int)op1) | (((unsigned int)op2) << 8) | (((unsigned int)op3) << 16)))
+#define M4(op1, op2, op3, op4)  (TWOS_COMPLEMENT_UINT32_TO_INT32(((unsigned int)op1) | (((unsigned int)op2) << 8) | (((unsigned int)op3) << 16) | (((unsigned int)op4) << 24)))
 
 static BOOL code_match(CodeContext *s, int pos, ...)
 {
@@ -32314,8 +32317,10 @@
             }
         } else {
             b->vardefs = (void *)((uint8_t*)b + vardefs_offset);
-            memcpy(b->vardefs, fd->args, fd->arg_count * sizeof(fd->args[0]));
-            memcpy(b->vardefs + fd->arg_count, fd->vars, fd->var_count * sizeof(fd->vars[0]));
+            if (fd->args)
+              memcpy(b->vardefs, fd->args, fd->arg_count * sizeof(fd->args[0]));
+            if (fd->vars)
+              memcpy(b->vardefs + fd->arg_count, fd->vars, fd->var_count * sizeof(fd->vars[0]));
         }
         b->var_count = fd->var_count;
         b->arg_count = fd->arg_count;

diff --git a/quickjs.h b/quickjs.h
index bb84829..b8078bb 100644
--- a/quickjs.h
+++ b/quickjs.h

@@ -538,17 +538,36 @@
 JSValue JS_NewBigInt64(JSContext *ctx, int64_t v);
 JSValue JS_NewBigUint64(JSContext *ctx, uint64_t v);
 
+// This is a portable, compiler-independent way of getting a two's complement
+// representation of a signed 32-bit int from an unsigned 32-bit int without
+// assuming the underlying representation of a signed int.
+//
+// Clang effectively optimizes this calculation away into a no-op.
+#define TWOS_COMPLEMENT_UINT32_TO_INT32(x) \
+    (((x) <= INT32_MAX) ? (int32_t)(x) : ((int32_t)((x) - (uint32_t)(INT32_MIN)) + INT32_MIN))
+
 static js_force_inline JSValue JS_NewFloat64(JSContext *ctx, double d)
 {
     JSValue v;
-    int32_t val;
     union {
         double d;
         uint64_t u;
     } u, t;
     u.d = d;
-    val = (int32_t)d;
+
+    // The integral value for `d` contains a two's complement representation of
+    // a 32-bit signed int. We cannot directly convert to an int32_t because
+    // it's possible for the floating point representation to be a value very
+    // close to INT32_MAX, but due to the nature of floating points, can be
+    // larger than INT32_MAX by a very small value.
+    //
+    // What we can do is store this value into a uint32_t which can hold the
+    // value, and do some arithmetic to get the signed two's complement. This
+    // achieves the same assembly as casting to an int32_t with optimizations.
+    uint32_t integral_val = d;
+    int32_t val = TWOS_COMPLEMENT_UINT32_TO_INT32(integral_val);
     t.d = val;
+
     /* -0 cannot be represented as integer, so we compare the bit
         representation */
     if (u.u == t.u) {
commit	d86b5af0ab340250b2a68ab75e13ea214f9b80c5	[log] [tgz]
author	Leonard Chan <leonardchan@google.com>	Wed Dec 09 11:40:05 2020 -0800
committer	Leonard Chan <leonardchan@google.com>	Thu Dec 10 21:13:47 2020 +0000
tree	322d85f295c5c2280a39e6e4d9b98d046169b2b2
parent	69b041bc41fc8155eba9e6c9e86315b9c06e54c3 [diff]