diff --git a/host/include/libOpenglRender/IOStream.h b/host/include/libOpenglRender/IOStream.h
index 93954ab..9173d31 100644
--- a/host/include/libOpenglRender/IOStream.h
+++ b/host/include/libOpenglRender/IOStream.h
@@ -97,7 +97,7 @@
     // outside of GLESv2_enc will produce a link error. This is intentional
     // (technical debt).
     void readbackPixels(void* context, int width, int height, unsigned int format, unsigned int type, void* pixels);
-    void uploadPixels(void* context, int width, int height, unsigned int format, unsigned int type, const void* pixels);
+    void uploadPixels(void* context, int width, int height, int depth, unsigned int format, unsigned int type, const void* pixels);
 
 
 private:
diff --git a/shared/OpenglCodecCommon/GLClientState.cpp b/shared/OpenglCodecCommon/GLClientState.cpp
index 3f207b8..a31f697 100644
--- a/shared/OpenglCodecCommon/GLClientState.cpp
+++ b/shared/OpenglCodecCommon/GLClientState.cpp
@@ -852,7 +852,7 @@
     return 1;
 }
 
-void GLClientState::getPackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const
+void GLClientState::getPackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* bpp, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const
 {
     if (width <= 0 || height <= 0) {
         *startOffset = 0;
@@ -868,6 +868,7 @@
             m_pixelStore.pack_row_length,
             m_pixelStore.pack_skip_pixels,
             m_pixelStore.pack_skip_rows,
+            bpp,
             startOffset,
             pixelRowSize,
             totalRowSize);
@@ -875,7 +876,7 @@
     *skipRows = m_pixelStore.pack_skip_rows;
 }
 
-void GLClientState::getUnpackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const
+void GLClientState::getUnpackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* bpp, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const
 {
     if (width <= 0 || height <= 0) {
         *startOffset = 0;
@@ -891,6 +892,7 @@
             m_pixelStore.unpack_row_length,
             m_pixelStore.unpack_skip_pixels,
             m_pixelStore.unpack_skip_rows,
+            bpp,
             startOffset,
             pixelRowSize,
             totalRowSize);
@@ -898,6 +900,35 @@
     *skipRows = m_pixelStore.unpack_skip_rows;
 }
 
+void GLClientState::getUnpackingOffsets3D(GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, int* bpp, int* startOffset, int* pixelRowSize, int* totalRowSize, int* pixelImageSize, int* totalImageSize, int* skipRows, int* skipImages) const
+{
+    if (width <= 0 || height <= 0) {
+        *startOffset = 0;
+        *pixelRowSize = 0;
+        *totalRowSize = 0;
+        return;
+    }
+
+    GLESTextureUtils::computePackingOffsets3D(
+            width, height, depth,
+            format, type,
+            m_pixelStore.unpack_alignment,
+            m_pixelStore.unpack_row_length,
+            m_pixelStore.unpack_image_height,
+            m_pixelStore.unpack_skip_pixels,
+            m_pixelStore.unpack_skip_rows,
+            m_pixelStore.unpack_skip_images,
+            bpp,
+            startOffset,
+            pixelRowSize,
+            totalRowSize,
+            pixelImageSize,
+            totalImageSize);
+
+    *skipRows = m_pixelStore.unpack_skip_rows;
+    *skipImages = m_pixelStore.unpack_skip_images;
+}
+
 void GLClientState::setNumActiveUniformsInUniformBlock(GLuint program, GLuint uniformBlockIndex, GLint numActiveUniforms) {
     UniformBlockInfoKey key;
     key.program = program;
diff --git a/shared/OpenglCodecCommon/GLClientState.h b/shared/OpenglCodecCommon/GLClientState.h
index f9f8ee3..b7f5655 100644
--- a/shared/OpenglCodecCommon/GLClientState.h
+++ b/shared/OpenglCodecCommon/GLClientState.h
@@ -253,8 +253,9 @@
     size_t pixelDataSize(GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, int pack) const;
     size_t pboNeededDataSize(GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, int pack) const;
     size_t clearBufferNumElts(GLenum buffer) const;
-    void getPackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const;
-    void getUnpackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const;
+    void getPackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* bpp, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const;
+    void getUnpackingOffsets2D(GLsizei width, GLsizei height, GLenum format, GLenum type, int* bpp, int* startOffset, int* pixelRowSize, int* totalRowSize, int* skipRows) const;
+    void getUnpackingOffsets3D(GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, int* bpp, int* startOffset, int* pixelRowSize, int* totalRowSize, int* pixelImageSize, int* totalImageSize, int* skipRows, int* skipImages) const;
 
     void setCurrentProgram(GLint program) { m_currentProgram = program; }
     void setCurrentShaderProgram(GLint program) { m_currentShaderProgram = program; }
diff --git a/shared/OpenglCodecCommon/GLESTextureUtils.cpp b/shared/OpenglCodecCommon/GLESTextureUtils.cpp
index 297a862..cedcda8 100644
--- a/shared/OpenglCodecCommon/GLESTextureUtils.cpp
+++ b/shared/OpenglCodecCommon/GLESTextureUtils.cpp
@@ -207,7 +207,9 @@
     GLsizei alignedPitch = computePitch(width, format, type, align);
     int packingOffsetRows =
         (skipImages * height + skipRows);
-    return packingOffsetRows * alignedPitch + skipPixels * computePixelSize(format, type);
+    int res = packingOffsetRows * alignedPitch + skipPixels * computePixelSize(format, type);
+
+    return res;
 }
 
 void computeTextureStartEnd(
@@ -295,6 +297,7 @@
         int packRowLength,
         int packSkipPixels,
         int packSkipRows,
+        int* bpp,
         int* startOffset,
         int* packingPixelRowSize,
         int* packingTotalRowSize) {
@@ -307,10 +310,46 @@
         computePackingOffset(
                 format, type, widthTotal, height, packAlignment, packSkipPixels, packSkipRows, 0 /* skip images = 0 */);
 
+    if (bpp) *bpp = computePixelSize(format, type);
     if (startOffset) *startOffset = packingOffsetStart;
     if (packingPixelRowSize) *packingPixelRowSize = pixelsOnlyRowSize;
     if (packingTotalRowSize) *packingTotalRowSize = totalRowSize;
 }
 
+void computePackingOffsets3D(
+        GLsizei width, GLsizei height, GLsizei depth,
+        GLenum format, GLenum type,
+        int packAlignment,
+        int packRowLength,
+        int packImageHeight,
+        int packSkipPixels,
+        int packSkipRows,
+        int packSkipImages,
+        int* bpp,
+        int* startOffset,
+        int* packingPixelRowSize,
+        int* packingTotalRowSize,
+        int* packingPixelImageSize,
+        int* packingTotalImageSize) {
+
+    int widthTotal = (packRowLength == 0) ? width : packRowLength;
+    int totalRowSize = computePitch(widthTotal, format, type, packAlignment);
+    int pixelsOnlyRowSize = computePitch(width, format, type, packAlignment);
+
+    int heightTotal = packImageHeight == 0 ? height : packImageHeight;
+    int totalImageSize = totalRowSize * heightTotal;
+    int pixelsOnlyImageSize = totalRowSize * height;
+
+    int packingOffsetStart =
+        computePackingOffset(
+                format, type, widthTotal, heightTotal, packAlignment, packSkipPixels, packSkipRows, packSkipImages);
+
+    if (bpp) *bpp = computePixelSize(format, type);
+    if (startOffset) *startOffset = packingOffsetStart;
+    if (packingPixelRowSize) *packingPixelRowSize = pixelsOnlyRowSize;
+    if (packingTotalRowSize) *packingTotalRowSize = totalRowSize;
+    if (packingPixelImageSize) *packingPixelImageSize = pixelsOnlyImageSize;
+    if (packingTotalImageSize) *packingTotalImageSize = totalImageSize;
+}
 
 } // namespace GLESTextureUtils
diff --git a/shared/OpenglCodecCommon/GLESTextureUtils.h b/shared/OpenglCodecCommon/GLESTextureUtils.h
index f623d23..1d26b3a 100644
--- a/shared/OpenglCodecCommon/GLESTextureUtils.h
+++ b/shared/OpenglCodecCommon/GLESTextureUtils.h
@@ -49,9 +49,27 @@
         int packRowLength,
         int packSkipPixels,
         int packSkipRows,
+        int* bpp,
         int* startOffset,
         int* packingPixelRowSize,
         int* packingTotalRowSize);
 
+// For processing 3D textures exactly to the sizes of client buffers.
+void computePackingOffsets3D(
+        GLsizei width, GLsizei height, GLsizei depth,
+        GLenum format, GLenum type,
+        int packAlignment,
+        int packRowLength,
+        int packImageHeight,
+        int packSkipPixels,
+        int packSkipRows,
+        int packSkipImages,
+        int* bpp,
+        int* startOffset,
+        int* packingPixelRowSize,
+        int* packingTotalRowSize,
+        int* packingPixelImageSize,
+        int* packingTotalImageSize);
+
 } // namespace GLESTextureUtils
 #endif
diff --git a/system/GLESv2_enc/GL2Encoder.cpp b/system/GLESv2_enc/GL2Encoder.cpp
index 3ad9420..1f0b7a0 100755
--- a/system/GLESv2_enc/GL2Encoder.cpp
+++ b/system/GLESv2_enc/GL2Encoder.cpp
@@ -1902,7 +1902,10 @@
                 break;
             }
         }
-        memcpy(source, returned.substr(0, bufsize - 1).c_str(), bufsize);
+        std::string ret = returned.substr(0, bufsize - 1);
+
+        size_t toCopy = bufsize < (ret.size() + 1) ? bufsize : ret.size() + 1;
+        memcpy(source, ret.c_str(), toCopy);
     }
 }
 
@@ -4123,6 +4126,7 @@
     assert(ctx->m_state != NULL);
     SET_ERROR_IF(!isValidDrawMode(mode), GL_INVALID_ENUM);
     SET_ERROR_IF(count < 0, GL_INVALID_VALUE);
+    SET_ERROR_IF(primcount < 0, GL_INVALID_VALUE);
 
     bool has_client_vertex_arrays = false;
     bool has_indirect_arrays = false;
@@ -4148,6 +4152,7 @@
     assert(ctx->m_state != NULL);
     SET_ERROR_IF(!isValidDrawMode(mode), GL_INVALID_ENUM);
     SET_ERROR_IF(count < 0, GL_INVALID_VALUE);
+    SET_ERROR_IF(primcount < 0, GL_INVALID_VALUE);
     SET_ERROR_IF(!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_SHORT || type == GL_UNSIGNED_INT), GL_INVALID_ENUM);
     SET_ERROR_IF(ctx->m_state->getTransformFeedbackActiveUnpaused(), GL_INVALID_OPERATION);
 
diff --git a/system/GLESv2_enc/IOStream2.cpp b/system/GLESv2_enc/IOStream2.cpp
index 1362d3e..4890852 100644
--- a/system/GLESv2_enc/IOStream2.cpp
+++ b/system/GLESv2_enc/IOStream2.cpp
@@ -12,12 +12,14 @@
     GL2Encoder *ctx = (GL2Encoder *)context;
     assert (ctx->state() != NULL);
 
+    int bpp = 0;
     int startOffset = 0;
     int pixelRowSize = 0;
     int totalRowSize = 0;
     int skipRows = 0;
 
     ctx->state()->getPackingOffsets2D(width, height, format, type,
+                                      &bpp,
                                       &startOffset,
                                       &pixelRowSize,
                                       &totalRowSize,
@@ -31,7 +33,7 @@
         pixelRowSize == totalRowSize) {
         // fast path
         readback(pixels, pixelDataSize);
-    } else if (pixelRowSize == totalRowSize) {
+    } else if (pixelRowSize == totalRowSize && (pixelRowSize == width * bpp)) {
         // fast path but with skip in the beginning
         std::vector<char> paddingToDiscard(startOffset, 0);
         readback(&paddingToDiscard[0], startOffset);
@@ -51,63 +53,173 @@
         char* start = (char*)pixels + startOffset;
 
         for (int i = 0; i < height; i++) {
-            readback(start, pixelRowSize);
-            totalReadback += pixelRowSize;
-            readback(&paddingToDiscard[0], paddingSize);
-            totalReadback += paddingSize;
-            start += totalRowSize;
+            if (pixelRowSize != width * bpp) {
+                size_t rowSlack = pixelRowSize - width * bpp;
+                std::vector<char> rowSlackToDiscard(rowSlack, 0);
+                readback(start, width * bpp);
+                readback(&rowSlackToDiscard[0], rowSlack);
+                totalReadback += pixelRowSize;
+                readback(&paddingToDiscard[0], paddingSize);
+                totalReadback += paddingSize;
+                start += totalRowSize;
+            } else {
+                readback(start, pixelRowSize);
+                totalReadback += pixelRowSize;
+                readback(&paddingToDiscard[0], paddingSize);
+                totalReadback += paddingSize;
+                start += totalRowSize;
+            }
         }
     }
 }
 
-void IOStream::uploadPixels(void* context, int width, int height, unsigned int format, unsigned int type, const void* pixels) {
+void IOStream::uploadPixels(void* context, int width, int height, int depth, unsigned int format, unsigned int type, const void* pixels) {
     GL2Encoder *ctx = (GL2Encoder *)context;
     assert (ctx->state() != NULL);
 
-    int startOffset = 0;
-    int pixelRowSize = 0;
-    int totalRowSize = 0;
-    int skipRows = 0;
+    if (1 == depth) {
+        int bpp = 0;
+        int startOffset = 0;
+        int pixelRowSize = 0;
+        int totalRowSize = 0;
+        int skipRows = 0;
 
-    ctx->state()->getUnpackingOffsets2D(width, height, format, type,
-                                        &startOffset,
-                                        &pixelRowSize,
-                                        &totalRowSize,
-                                        &skipRows);
+        ctx->state()->getUnpackingOffsets2D(width, height, format, type,
+                &bpp,
+                &startOffset,
+                &pixelRowSize,
+                &totalRowSize,
+                &skipRows);
 
-    size_t pixelDataSize =
-        ctx->state()->pixelDataSize(
-            width, height, 1, format, type, 0 /* is unpack */);
+        size_t pixelDataSize =
+            ctx->state()->pixelDataSize(
+                    width, height, 1, format, type, 0 /* is unpack */);
 
-    if (startOffset == 0 &&
-        pixelRowSize == totalRowSize) {
-        // fast path
-        writeFully(pixels, pixelDataSize);
-    } else if (pixelRowSize == totalRowSize) {
-        // fast path but with skip in the beginning
-        std::vector<char> paddingToDiscard(startOffset, 0);
-        writeFully(&paddingToDiscard[0], startOffset);
-        writeFully((char*)pixels + startOffset, pixelDataSize - startOffset);
-    } else {
-        int totalReadback = 0;
-
-        if (startOffset > 0) {
+        if (startOffset == 0 &&
+                pixelRowSize == totalRowSize) {
+            // fast path
+            writeFully(pixels, pixelDataSize);
+        } else if (pixelRowSize == totalRowSize && (pixelRowSize == width * bpp)) {
+            // fast path but with skip in the beginning
             std::vector<char> paddingToDiscard(startOffset, 0);
             writeFully(&paddingToDiscard[0], startOffset);
-            totalReadback += startOffset;
+            writeFully((char*)pixels + startOffset, pixelDataSize - startOffset);
+        } else {
+            int totalReadback = 0;
+
+            if (startOffset > 0) {
+                std::vector<char> paddingToDiscard(startOffset, 0);
+                writeFully(&paddingToDiscard[0], startOffset);
+                totalReadback += startOffset;
+            }
+            // need to upload row by row
+            size_t paddingSize = totalRowSize - pixelRowSize;
+            std::vector<char> paddingToDiscard(paddingSize, 0);
+
+            char* start = (char*)pixels + startOffset;
+
+            for (int i = 0; i < height; i++) {
+                if (pixelRowSize != width * bpp) {
+                    size_t rowSlack = pixelRowSize - width * bpp;
+                    std::vector<char> rowSlackToDiscard(rowSlack, 0);
+                    writeFully(start, width * bpp);
+                    writeFully(&rowSlackToDiscard[0], rowSlack);
+                    totalReadback += pixelRowSize;
+                    writeFully(&paddingToDiscard[0], paddingSize);
+                    totalReadback += paddingSize;
+                    start += totalRowSize;
+                } else {
+                    writeFully(start, pixelRowSize);
+                    totalReadback += pixelRowSize;
+                    writeFully(&paddingToDiscard[0], paddingSize);
+                    totalReadback += paddingSize;
+                    start += totalRowSize;
+                }
+            }
         }
-        // need to upload row by row
-        size_t paddingSize = totalRowSize - pixelRowSize;
-        std::vector<char> paddingToDiscard(paddingSize, 0);
+    } else {
+        int bpp = 0;
+        int startOffset = 0;
+        int pixelRowSize = 0;
+        int totalRowSize = 0;
+        int pixelImageSize = 0;
+        int totalImageSize = 0;
+        int skipRows = 0;
+        int skipImages = 0;
 
-        char* start = (char*)pixels + startOffset;
+        ctx->state()->getUnpackingOffsets3D(width, height, depth, format, type,
+                &bpp,
+                &startOffset,
+                &pixelRowSize,
+                &totalRowSize,
+                &pixelImageSize,
+                &totalImageSize,
+                &skipRows,
+                &skipImages);
 
-        for (int i = 0; i < height; i++) {
-            writeFully(start, pixelRowSize);
-            totalReadback += pixelRowSize;
-            writeFully(&paddingToDiscard[0], paddingSize);
-            totalReadback += paddingSize;
-            start += totalRowSize;
+        size_t pixelDataSize =
+            ctx->state()->pixelDataSize(
+                    width, height, depth, format, type, 0 /* is unpack */);
+
+        size_t sent = 0;
+
+        if (startOffset == 0 &&
+            pixelRowSize == totalRowSize &&
+            pixelImageSize == totalImageSize) {
+            // fast path
+            writeFully(pixels, pixelDataSize);
+            sent += pixelDataSize;
+        } else if (pixelRowSize == totalRowSize &&
+                   pixelImageSize == totalImageSize &&
+                   pixelRowSize == (width * bpp)) {
+            // fast path but with skip in the beginning
+            std::vector<char> paddingToDiscard(startOffset, 0);
+            writeFully(&paddingToDiscard[0], startOffset);
+            writeFully((char*)pixels + startOffset, pixelDataSize - startOffset);
+            sent += pixelDataSize;
+        } else {
+            int totalReadback = 0;
+
+            if (startOffset > 0) {
+                std::vector<char> paddingToDiscard(startOffset, 0);
+                writeFully(&paddingToDiscard[0], startOffset);
+                totalReadback += startOffset;
+            }
+            // need to upload row by row
+            size_t paddingSize = totalRowSize - pixelRowSize;
+            std::vector<char> paddingToDiscard(paddingSize, 0);
+
+            char* start = (char*)pixels + startOffset;
+
+            size_t rowSlack = pixelRowSize - width * bpp;
+            std::vector<char> rowSlackToDiscard(rowSlack, 0);
+
+            size_t imageSlack = totalImageSize - pixelImageSize;
+            std::vector<char> imageSlackToDiscard(imageSlack, 0);
+
+            for (int k = 0; k < depth; ++k) {
+                for (int i = 0; i < height; i++) {
+                    if (pixelRowSize != width * bpp) {
+                        writeFully(start, width * bpp);
+                        writeFully(&rowSlackToDiscard[0], rowSlack);
+                        totalReadback += pixelRowSize;
+                        writeFully(&paddingToDiscard[0], paddingSize);
+                        totalReadback += paddingSize;
+                        start += totalRowSize;
+                    } else {
+                        writeFully(start, pixelRowSize);
+                        totalReadback += pixelRowSize;
+                        writeFully(&paddingToDiscard[0], paddingSize);
+                        totalReadback += paddingSize;
+                        start += totalRowSize;
+                    }
+                }
+                if (imageSlack > 0) {
+                    writeFully(&imageSlackToDiscard[0], imageSlack);
+                    start += imageSlack;
+                    totalReadback += imageSlack;
+                }
+            }
         }
     }
 }
diff --git a/system/GLESv2_enc/gl2_enc.cpp b/system/GLESv2_enc/gl2_enc.cpp
index fb70443..814021b 100644
--- a/system/GLESv2_enc/gl2_enc.cpp
+++ b/system/GLESv2_enc/gl2_enc.cpp
@@ -3344,7 +3344,7 @@
 	stream->writeFully(&__size_pixels,4);
 	if (useChecksum) checksumCalculator->addBuffer(&__size_pixels,4);
 	if (pixels != NULL) {
-	    stream->uploadPixels(self, width, height, format, type, pixels);
+		 stream->uploadPixels(self, width, height, 1, format, type, pixels);
 		if (useChecksum) checksumCalculator->addBuffer(pixels, __size_pixels);
 	}
 	buf = stream->alloc(checksumSize);
@@ -3497,7 +3497,7 @@
 	stream->writeFully(&__size_pixels,4);
 	if (useChecksum) checksumCalculator->addBuffer(&__size_pixels,4);
 	if (pixels != NULL) {
-	    stream->uploadPixels(self, width, height, format, type, pixels);
+		 stream->uploadPixels(self, width, height, 1, format, type, pixels);
 		if (useChecksum) checksumCalculator->addBuffer(pixels, __size_pixels);
 	}
 	buf = stream->alloc(checksumSize);
@@ -8489,7 +8489,7 @@
 	stream->writeFully(&__size_data,4);
 	if (useChecksum) checksumCalculator->addBuffer(&__size_data,4);
 	if (data != NULL) {
-		stream->writeFully(data, __size_data);
+		 stream->uploadPixels(self, width, height, depth, format, type, data);
 		if (useChecksum) checksumCalculator->addBuffer(data, __size_data);
 	}
 	buf = stream->alloc(checksumSize);
@@ -8596,7 +8596,7 @@
 	stream->writeFully(&__size_data,4);
 	if (useChecksum) checksumCalculator->addBuffer(&__size_data,4);
 	if (data != NULL) {
-		stream->writeFully(data, __size_data);
+		 stream->uploadPixels(self, width, height, depth, format, type, data);
 		if (useChecksum) checksumCalculator->addBuffer(data, __size_data);
 	}
 	buf = stream->alloc(checksumSize);
