Depend on packed_simd for simd support
diff --git a/Cargo.toml b/Cargo.toml
index d71cb36..a62c636 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,7 +23,7 @@
 std = ["rand_core/std", "alloc", "libc", "winapi", "cloudabi", "fuchsia-zircon"]
 alloc = ["rand_core/alloc"]  # enables Vec and Box support (without std)
 i128_support = [] # enables i128 and u128 support
-simd_support = [] # enables SIMD support
+simd_support = ["packed_simd"] # enables SIMD support
 serde1 = ["rand_core/serde1", "rand_isaac/serde1", "rand_xorshift/serde1"] # enables serialization for PRNGs
 
 [workspace]
@@ -35,6 +35,7 @@
 rand_isaac = { path = "rand_isaac", version = "0.1" }
 rand_xorshift = { path = "rand_xorshift", version = "0.1" }
 log = { version = "0.4", optional = true }
+packed_simd = { version = "0.1", optional = true, features = ["into_bits"] }
 
 [target.'cfg(unix)'.dependencies]
 libc = { version = "0.2", optional = true }
diff --git a/src/distributions/float.rs b/src/distributions/float.rs
index 0d418eb..d7a5bb9 100644
--- a/src/distributions/float.rs
+++ b/src/distributions/float.rs
@@ -15,7 +15,7 @@
 use distributions::{Distribution, Standard};
 use distributions::utils::FloatSIMDUtils;
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 
 /// A distribution to sample floating point numbers uniformly in the half-open
 /// interval `(0, 1]`, i.e. including 1 but not 0.
@@ -106,7 +106,7 @@
                 // Multiply-based method; 24/53 random bits; [0, 1) interval.
                 // We use the most significant bits because for simple RNGs
                 // those are usually more random.
-                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let float_size = mem::size_of::<$f_scalar>() as u32 * 8;
                 let precision = $fraction_bits + 1;
                 let scale = 1.0 / ((1 as $u_scalar << precision) as $f_scalar);
 
@@ -121,7 +121,7 @@
                 // Multiply-based method; 24/53 random bits; (0, 1] interval.
                 // We use the most significant bits because for simple RNGs
                 // those are usually more random.
-                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let float_size = mem::size_of::<$f_scalar>() as u32 * 8;
                 let precision = $fraction_bits + 1;
                 let scale = 1.0 / ((1 as $u_scalar << precision) as $f_scalar);
 
@@ -138,7 +138,7 @@
                 // We use the most significant bits because for simple RNGs
                 // those are usually more random.
                 use core::$f_scalar::EPSILON;
-                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let float_size = mem::size_of::<$f_scalar>() as u32 * 8;
 
                 let value: $uty = rng.gen();
                 let fraction = value >> (float_size - $fraction_bits);
@@ -174,7 +174,7 @@
     use distributions::{Open01, OpenClosed01};
     use rngs::mock::StepRng;
     #[cfg(feature="simd_support")]
-    use core::simd::*;
+    use packed_simd::*;
 
     const EPSILON32: f32 = ::core::f32::EPSILON;
     const EPSILON64: f64 = ::core::f64::EPSILON;
diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs
index 82efd9b..0fce20f 100644
--- a/src/distributions/integer.rs
+++ b/src/distributions/integer.rs
@@ -13,7 +13,7 @@
 use {Rng};
 use distributions::{Distribution, Standard};
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 
 impl Distribution<u8> for Standard {
     #[inline]
diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs
index 0e5a4f9..a1358df 100644
--- a/src/distributions/uniform.rs
+++ b/src/distributions/uniform.rs
@@ -124,7 +124,7 @@
 
 
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 
 /// Sample values uniformly between two bounds.
 ///
@@ -571,7 +571,7 @@
 
             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
                 // Generate a value in the range [1, 2)
-                let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard as u8)
+                let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard)
                                .into_float_with_exponent(0);
 
                 // Get a value in the range [0, 1) in order to avoid
@@ -600,7 +600,7 @@
 
                 loop {
                     // Generate a value in the range [1, 2)
-                    let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard as u32)
+                    let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard)
                                    .into_float_with_exponent(0);
 
                     // Get a value in the range [0, 1) in order to avoid
@@ -785,7 +785,7 @@
     use rngs::mock::StepRng;
     use distributions::uniform::Uniform;
     use distributions::utils::FloatAsSIMD;
-    #[cfg(feature="simd_support")] use core::simd::*;
+    #[cfg(feature="simd_support")] use packed_simd::*;
 
     #[should_panic]
     #[test]
diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs
index 8ac2c66..a21c7cf 100644
--- a/src/distributions/utils.rs
+++ b/src/distributions/utils.rs
@@ -11,7 +11,7 @@
 //! Math helper functions
 
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 #[cfg(feature="std")]
 use distributions::ziggurat_tables;
 #[cfg(feature="std")]
@@ -263,7 +263,7 @@
                 <$ty>::from_bits(<$uty>::from_bits(self) + <$uty>::from_bits(mask))
             }
             type UInt = $uty;
-            fn cast_from_int(i: Self::UInt) -> Self { $ty::from(i) }
+            fn cast_from_int(i: Self::UInt) -> Self { i.cast() }
         }
     }
 }
@@ -271,10 +271,10 @@
 #[cfg(feature="simd_support")] simd_impl! { f32x2, f32, m32x2, u32x2 }
 #[cfg(feature="simd_support")] simd_impl! { f32x4, f32, m32x4, u32x4 }
 #[cfg(feature="simd_support")] simd_impl! { f32x8, f32, m32x8, u32x8 }
-#[cfg(feature="simd_support")] simd_impl! { f32x16, f32, m1x16, u32x16 }
+#[cfg(feature="simd_support")] simd_impl! { f32x16, f32, m32x16, u32x16 }
 #[cfg(feature="simd_support")] simd_impl! { f64x2, f64, m64x2, u64x2 }
 #[cfg(feature="simd_support")] simd_impl! { f64x4, f64, m64x4, u64x4 }
-#[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m1x8, u64x8 }
+#[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m64x8, u64x8 }
 
 /// Calculates ln(gamma(x)) (natural logarithm of the gamma
 /// function) using the Lanczos approximation.
diff --git a/src/lib.rs b/src/lib.rs
index 7087015..f07a68c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -236,6 +236,8 @@
 #[cfg(feature = "std")] extern crate core;
 #[cfg(all(feature = "alloc", not(feature="std")))] extern crate alloc;
 
+#[cfg(feature="simd_support")] extern crate packed_simd;
+
 #[cfg(all(target_arch="wasm32", not(target_os="emscripten"), feature="stdweb"))]
 #[macro_use]
 extern crate stdweb;