[libpng10] Imported from libpng-1.0.27rc1.tar
diff --git a/ANNOUNCE b/ANNOUNCE
index 6c50615..ffa2f9a 100644
--- a/ANNOUNCE
+++ b/ANNOUNCE
@@ -1,47 +1,181 @@
 
-Libpng 1.0.26 - May 15, 2007
+Libpng 1.0.27rc1 - July 31, 2007
 
-This is a public release of libpng, intended for use in production codes.
+This is not intended to be a public release.  It will be replaced
+within a few weeks by a public version or by another test version.
 
 Files available for download:
 
 Source files with LF line endings (for Unix/Linux) and with a
 "configure" script
 
-   libpng-1.0.26.tar.gz
-   libpng-1.0.26.tar.bz2
+   libpng-1.0.27rc1.tar.gz
+   libpng-1.0.27rc1.tar.bz2
 
 Source files with LF line endings (for Unix/Linux) without the
 "configure" script
 
-   libpng-1.0.26-no-config.tar.gz
-   libpng-1.0.26-no-config.tar.bz2
+   libpng-1.0.27rc1-no-config.tar.gz
+   libpng-1.0.27rc1-no-config.tar.bz2
 
 Source files with CRLF line endings (for Windows), without the
 "configure" script
 
-   lpng1026.zip
-   lpng1026.tar.bz2
+   lp1027r01.zip
+   lp1027r01.tar.bz2
 
 Project files
 
-   libpng-1.0.26-project-netware.zip
-   libpng-1.0.26-project-wince.zip
+   libpng-1.0.27rc1-project-netware.zip
+   libpng-1.0.27rc1-project-wince.zip
 
 Other information:
 
-   libpng-1.0.26-README.txt
-   libpng-1.0.26-KNOWNBUGS.txt
-   libpng-1.0.26-LICENSE.txt
-   libpng-1.0.26-Y2K-compliance.txt
+   libpng-1.0.27rc1-README.txt
+   libpng-1.0.27rc1-KNOWNBUGS.txt
+   libpng-1.0.27rc1-LICENSE.txt
+   libpng-1.0.27rc1-Y2K-compliance.txt
 
-Changes since the last public release (1.0.25):
+Changes since the last public release (1.0.26):
 
-version 1.0.26 [May 15, 2007]
+version 1.2.19beta1 [May 18, 2007]
+  Changed "const static" to "static PNG_CONST" everywhere, mostly undoing
+    change of libpng-1.2.17beta2.  Changed other "const" to "PNG_CONST"
 
-  Reverted the recent change to symbol-handling in configure script
+version 1.2.19beta2 [May 18, 2007]
+  Fixed png_do_expand() to only use the appropriate bits of tRNS value.
+
+version 1.2.19beta3 [May 20, 2007]
+  Add some "png_byte" typecasts in png_check_keyword().
+  Write new_key instead of key in zTXt chunk.
+
+version 1.2.19beta4 [May 21, 2007]
+  Add png_snprintf() function and use it in place of sprint() for improved
+    defense against buffer overflows.
+
+version 1.2.19beta5 [May 21, 2007]
+  Fixed png_handle_tRNS() to only use the appropriate bits of tRNS value.
+  Changed handling of more unused parameters, to avoid compiler warnings.
+  Removed some PNG_CONST in pngwutil.c to avoid compiler warnings.
+
+version 1.2.19beta6 [May 22, 2007]
+  Added some #ifdef PNG_MMX_CODE_SUPPORTED where needed in pngvcrd.c
+  Added a special "_MSC_VER" case that defines png_snprintf to _snprintf
+
+version 1.2.19beta7 [May 22, 2007]
+  Squelched png_squelch_warnings() in pnggccrd.c and added an
+    #ifdef PNG_MMX_CODE_SUPPORTED block around the declarations that caused
+    the warnings that png_squelch_warnings was squelching.
+
+version 1.2.19beta8 [May 22, 2007]
+  Removed __MMX__ from test in pngconf.h.
+
+version 1.2.19beta9 [May 23, 2007]
+  Made png_squelch_warnings() available via PNG_SQUELCH_WARNINGS macro.
+  Revised png_squelch_warnings() so it might work.
+  Updated makefile.sgcc and makefile.solaris; added makefile.solaris-x86.
+
+version 1.2.19beta10 [May 24, 2007]
+  Resquelched png_squelch_warnings(), use "__attribute__((used))" instead.
+
+version 1.2.19beta11 [May 28, 2007]
+  Return 0 from png_get_sPLT() and png_get_unknown_chunks() if png_ptr is NULL;
+    changed three remaining instances of png_strcpy() to png_strncpy() (David
+    Hill).
+  Make test for NULL row_buf at the beginning of png_do_read_transformations
+    unconditional.
+
+version 1.2.19beta12 [May 28, 2007]
+  Revised pnggccrd.c.
+
+version 1.2.19beta13 [June 14, 2007]
+  Prefer PNG_USE_PNGVCRD when _MSC_VER is defined in pngconf.h
+
+version 1.2.19beta14 [June 16, 2007]
+  Fix bug with handling of 16-bit transparency, introduced in 1.2.19beta2
+
+version 1.2.19beta15 [June 17, 2007]
+  Revised pnggccrd.c.
+
+version 1.2.19beta16 [June 18, 2007]
+  Revised pnggccrd.c again.
+  Updated contrib/gregbook.
+  Changed '#include "pnggccrd.c"' to 'include "$srcdir/pnggccrd.c"'
+    in configure.ac
+
+version 1.2.19beta17 [June 19, 2007]
+  Revised many of the makefiles, to set -DPNG_NO_MMX_CODE where needed
+    and to not use -O3 unless -DPNG_NO_MMX_CODE is also set.
+
+version 1.2.19beta18 [June 23, 2007]
+  Replaced some C++ style comments with C style comments in pnggccrd.c.
+  Copied optimized C code from pnggccrd.c to pngrutil.c, removed dependency
+    on pnggccrd.o from many makefiles.
+  Added sl and dylib to list of extensions be installed by Makefile.am
+
+version 1.2.19beta19 [June 28, 2007]
+  Fixed testing PNG_RGB_TO_GRAY_ERR & PNG_RGB_TO_GRAY_WARN in pngrtran.c
+  More cleanup of pnggccrd.c and pngvcrd.c
+
+version 1.2.19beta20 [June 29, 2007]
+  Rebuilt Makefile.in and configure using libtool-1.5.24.
+  Fixed typo in pnggccrd.c
+
+version 1.2.19beta21 [June 30, 2007]
+  More revision of pnggccrd.c
+  Added "test" target to Makefile.in and Makefile.am
+
+version 1.2.19beta22 [July 3, 2007]
+  Added info about pngrutil/pnggccrd/pngvcrd to png_get_header_version()
+  Fix type definition of dummy_value_a, b in pnggccrd.c
+
+version 1.2.19beta23 [July 10, 2007]
+  Revert change to type definition of dummy_value_a, b in pnggccrd.c
+  Make sure __PIC__ is defined in pnggccrd.c when PIC is defined.
+  Make PNG_NO_MMX_CODE the default on x86_64 GNUC platforms.
+
+version 1.2.19beta24 [July 14, 2007]
+  Added PNG_NO_READ_FILTER, PNG_NO_WRITE_FILTER, PNG_NO_WARNING macros.
+  Added contrib/pngminim to demonstrate building minimal encoder and decoder
+
+version 1.2.19beta25 [July 15, 2007]
+  Removed the new PNG_NO_READ_FILTER macro since it would make the library
+    unable to read valid PNG files, and filtering is at the heart of the
+    PNG format.
+
+version 1.2.19beta26 [July 16, 2007]
+  Changed "png_free(str)" to "png_free(png_ptr,str)" in pngrutil.c WinCE
+    code (Yves Piguet).  This bug was introduced in libpng-1.2.14.
+  Updated scripts/CMakeLists.txt
+  Relocated a misplaced #endif in pnggccrd.c
+
+version 1.2.19beta27 [July 17, 2007]
+  Fixed incorrect stride and number of bytes copied (was 4 instead of
+    6 bytes) in the cleanup loop of pnggccrd.c and pngvcrd.c for handling
+    the end of 48-bit interlaced rows (Glenn R-P).
+
+version 1.2.19beta28 [July 19, 2007]
+  Removed requirement for gcc-4.1 or better to use PNG_HAVE_MMX_FILTER_ROW
+    on x86_64 platforms
+  Added png_warning() in pngrutil.c for short iCCP, iTXt, sPLT, or zTXT chunks.
+  Revised pngtest.c so warnings are displayed regardless of PNG_NO_STDIO.
+
+version 1.2.19beta30 [July 26, 2007]
+  Revised pnggccrd.c
+
+version 1.2.19beta31 [July 27, 2007]
+  Fix typos in pnggccrd.c
+
+version 1.2.19beta32 [July 31, 2007]
+  Disable PNG_MMX_CODE_SUPPORTED when PNG_ASSEMBLER_CODE_SUPPORTED is off.
+  Enable PNG_MMX_READ_FILTER_* by default (they were inadvertently disabled in
+    libpng-1.2.19beta23).
+  Fix some debugging statements in pnggccrd.c and pngrutil.c
+  Added information about disabling the MMX code in libpng documentation.
+
 
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net
+
 (subscription required; visit 
 https://lists.sourceforge.net/lists/listinfo/png-mng-implement
 to subscribe) or to glennrp at users.sourceforge.net
diff --git a/CHANGES b/CHANGES
index 66c5455..447a187 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1469,7 +1469,7 @@
   Restored scripts/makefile.elf which was inadvertently deleted.
 
 version 1.2.9beta6 [March 6, 2006]
-  Fixed typo (26) in configuration files.
+  Fixed typo (27) in configuration files.
 
 version 1.2.9beta7 [March 7, 2006]
   Removed libpng.vers and libpng.sym from libpng12_la_SOURCES in Makefile.am
@@ -1712,12 +1712,152 @@
 version 1.0.25 [May 15, 2007]
 version 1.2.17 [May 15, 2007]
   Added "png_ptr->num_trans=0" before error return in png_handle_tRNS,
-    to eliminate a vulnerability (CVE-2007-2554, CERT VU#684664)
+    to eliminate a vulnerability (CVE-2007-2445, CERT VU#684664)
 
 version 1.0.26 [May 15, 2007]
 version 1.2.18 [May 15, 2007]
   Reverted the libpng-1.2.17rc3 change to symbol-handling in configure script
 
+version 1.2.19beta1 [May 18, 2007]
+  Changed "const static" to "static PNG_CONST" everywhere, mostly undoing
+    change of libpng-1.2.17beta2.  Changed other "const" to "PNG_CONST"
+  Changed some handling of unused parameters, to avoid compiler warnings.
+    "if (unused == NULL) return;" becomes "unused = unused".
+
+version 1.2.19beta2 [May 18, 2007]
+  Only use the valid bits of tRNS value in png_do_expand() (Brian Cartier)
+
+version 1.2.19beta3 [May 19, 2007]
+  Add some "png_byte" typecasts in png_check_keyword() and write new_key
+  instead of key in zTXt chunk (Kevin Ryde).
+
+version 1.2.19beta4 [May 21, 2007]
+  Add png_snprintf() function and use it in place of sprint() for improved
+    defense against buffer overflows.
+
+version 1.2.19beta5 [May 21, 2007]
+  Fixed png_handle_tRNS() to only use the valid bits of tRNS value.
+  Changed handling of more unused parameters, to avoid compiler warnings.
+  Removed some PNG_CONST in pngwutil.c to avoid compiler warnings.
+
+version 1.2.19beta6 [May 22, 2007]
+  Added some #ifdef PNG_MMX_CODE_SUPPORTED where needed in pngvcrd.c
+  Added a special "_MSC_VER" case that defines png_snprintf to _snprintf
+
+version 1.2.19beta7 [May 22, 2007]
+  Squelched png_squelch_warnings() in pnggccrd.c and added an
+    #ifdef PNG_MMX_CODE_SUPPORTED block around the declarations that caused
+    the warnings that png_squelch_warnings was squelching.
+
+version 1.2.19beta8 [May 22, 2007]
+  Removed __MMX__ from test in pngconf.h.
+
+version 1.2.19beta9 [May 23, 2007]
+  Made png_squelch_warnings() available via PNG_SQUELCH_WARNINGS macro.
+  Revised png_squelch_warnings() so it might work.
+  Updated makefile.sgcc and makefile.solaris; added makefile.solaris-x86.
+
+version 1.2.19beta10 [May 24, 2007]
+  Resquelched png_squelch_warnings(), use "__attribute__((used))" instead.
+
+version 1.2.19beta11 [May 28, 2007]
+  Return 0 from png_get_sPLT() and png_get_unknown_chunks() if png_ptr is NULL;
+    changed three remaining instances of png_strcpy() to png_strncpy() (David
+    Hill).
+  Make test for NULL row_buf at the beginning of png_do_read_transformations
+    unconditional.
+
+version 1.2.19beta12 [May 28, 2007]
+  Revised pnggccrd.c.
+
+version 1.2.19beta13 [June 14, 2007]
+  Prefer PNG_USE_PNGVCRD when _MSC_VER is defined in pngconf.h
+
+version 1.2.19beta14 [June 16, 2007]
+  Fix bug with handling of 16-bit transparency, introduced in 1.2.19beta2
+
+version 1.2.19beta15 [June 17, 2007]
+  Revised pnggccrd.c.
+
+version 1.2.19beta16 [June 18, 2007]
+  Revised pnggccrd.c again.
+  Updated contrib/gregbook.
+  Changed '#include "pnggccrd.c"' to 'include "$srcdir/pnggccrd.c"'
+    in configure.ac
+
+version 1.2.19beta17 [June 19, 2007]
+  Revised many of the makefiles, to set -DPNG_NO_MMX_CODE where needed
+    and to not use -O3 unless -DPNG_NO_MMX_CODE is also set.
+
+version 1.2.19beta18 [June 23, 2007]
+  Replaced some C++ style comments with C style comments in pnggccrd.c.
+  Copied optimized C code from pnggccrd.c to pngrutil.c, removed dependency
+    on pnggccrd.o from many makefiles.
+  Added sl and dylib to list of extensions be installed by Makefile.am
+
+version 1.2.19beta19 [June 28, 2007]
+  Fixed testing PNG_RGB_TO_GRAY_ERR & PNG_RGB_TO_GRAY_WARN in pngrtran.c
+  More cleanup of pnggccrd.c and pngvcrd.c
+
+version 1.2.19beta20 [June 29, 2007]
+  Rebuilt Makefile.in and configure using libtool-1.5.24.
+  Fixed typo in pnggccrd.c
+
+version 1.2.19beta21 [June 30, 2007]
+  More revision of pnggccrd.c
+  Added "test" target to Makefile.in and Makefile.am
+
+version 1.2.19beta22 [July 3, 2007]
+  Added info about pngrutil/pnggccrd/pngvcrd to png_get_header_version()
+  Fix type definition of dummy_value_a, b in pnggccrd.c
+
+version 1.2.19beta23 [July 10, 2007]
+  Revert change to type definition of dummy_value_a, b in pnggccrd.c
+  Make sure __PIC__ is defined in pnggccrd.c when PIC is defined.
+  Require gcc-4.1 or better to use PNG_HAVE_MMX_FILTER_ROW on x86_64 platforms
+
+version 1.2.19beta24 [July 14, 2007]
+  Added PNG_NO_READ_FILTER, PNG_NO_WRITE_FILTER, PNG_NO_WARNING macros.
+  Added contrib/pngminim to demonstrate building minimal encoder and decoder
+
+version 1.2.19beta25 [July 15, 2007]
+  Removed the new PNG_NO_READ_FILTER macro since it would make the library
+    unable to read valid PNG files, and filtering is at the heart of the
+    PNG format.
+
+version 1.2.19beta26 [July 16, 2007]
+  Changed "png_free(str)" to "png_free(png_ptr,str)" in pngrutil.c WinCE
+    code (Yves Piguet).  This bug was introduced in libpng-1.2.14.
+  Updated scripts/CMakeLists.txt
+  Relocated a misplaced #endif in pnggccrd.c
+
+version 1.2.19beta27 [July 17, 2007]
+  Fixed incorrect stride and number of bytes copied (was 4 instead of
+    6 bytes) in the cleanup loop of pnggccrd.c and pngvcrd.c for handling
+    the end of 48-bit interlaced rows (Glenn R-P).
+
+version 1.2.19beta28 [July 19, 2007]
+  Removed requirement for gcc-4.1 or better to use PNG_HAVE_MMX_FILTER_ROW
+    on x86_64 platforms
+  Added png_warning() in pngrutil.c for short iCCP, iTXt, sPLT, or zTXT chunks.
+  Revised pngtest.c so warnings are displayed regardless of PNG_NO_STDIO.
+
+version 1.2.19beta29 [July 20, 2007]
+  Fix typo in pnggccrd.c (%%eax should be %%ax in secondloop48)
+
+version 1.2.19beta30 [July 26, 2007]
+  Revised pnggccrd.c
+
+version 1.2.19beta31 [July 27, 2007]
+  Fix typos in pnggccrd.c
+
+version 1.0.27rc1 and 1.2.19rc1 [July 31, 2007]
+  Disable PNG_MMX_CODE_SUPPORTED when PNG_ASSEMBLER_CODE_SUPPORTED is off.
+  Enable PNG_MMX_READ_FILTER_* by default, except when gcc-3.x is being
+    used (they were inadvertently disabled in libpng-1.2.19beta23).
+  Fix some debugging statements in pnggccrd.c and pngrutil.c
+  Added information about disabling the MMX code in libpng documentation.
+
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 (subscription required; visit
 https://lists.sourceforge.net/lists/listinfo/png-mng-implement
diff --git a/INSTALL b/INSTALL
index b8c0e4d..bcbbaa4 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,5 +1,5 @@
 
-Installing libpng version 1.0.26 - May 15, 2007
+Installing libpng version 1.0.27rc1 - July 31, 2007
 
 On Unix/Linux and similar systems, you can simply type
 
@@ -44,7 +44,7 @@
 correspond to the version of zlib that's installed.
 
 You can rename the directories that you downloaded (they
-might be called "libpng-1.0.26" or "lpng109" and "zlib-1.2.1"
+might be called "libpng-1.0.27rc1" or "lpng109" and "zlib-1.2.1"
 or "zlib121") so that you have directories called "zlib" and "libpng".
 
 Your directory structure should look like this:
@@ -101,14 +101,14 @@
  CMakeLists.txt    =>  "cmake" script
  makefile.std      =>  Generic UNIX makefile (cc, creates static libpng.a)
  makefile.elf      =>  Linux/ELF makefile symbol versioning,
-                       gcc, creates libpng10.so.0.1.0.26)
+                       gcc, creates libpng10.so.0.1.0.27rc1)
  makefile.linux    =>  Linux/ELF makefile
-                       (gcc, creates libpng10.so.0.1.0.26)
+                       (gcc, creates libpng10.so.0.1.0.27rc1)
  makefile.gcmmx    =>  Linux/ELF makefile
-                       (gcc, creates libpng10.so.0.1.0.26,
+                       (gcc, creates libpng10.so.0.1.0.27rc1,
                        uses assembler code tuned for Intel MMX platform)
  makefile.nommx    =>  Linux/ELF makefile
-                       (gcc, creates libpng10.so.0.1.0.26
+                       (gcc, creates libpng10.so.0.1.0.27rc1
                        does not use Intel MMX assembler code)
  makefile.gcc      =>  Generic makefile (gcc, creates static libpng.a)
  makefile.knr      =>  Archaic UNIX Makefile that converts files with
@@ -131,12 +131,14 @@
  makefile.openbsd  =>  OpenBSD makefile
  makefile.sgi      =>  Silicon Graphics IRIX makefile (cc, creates static lib)
  makefile.sggcc    =>  Silicon Graphics (gcc,
-                       creates libpng10.so.0.1.0.26)
+                       creates libpng10.so.0.1.0.27rc1)
  makefile.sunos    =>  Sun makefile
  makefile.solaris  =>  Solaris 2.X makefile (gcc,
-                       creates libpng10.so.0.1.0.26)
+                       creates libpng10.so.0.1.0.27rc1)
+ makefile.solaris-x86 =>  Solaris/intelMMX 2.X makefile (gcc,
+                       creates libpng10.so.0.1.0.27rc1)
  makefile.so9      =>  Solaris 9 makefile (gcc,
-                       creates libpng10.so.0.1.0.26)
+                       creates libpng10.so.0.1.0.27rc1)
  makefile.32sunu   =>  Sun Ultra 32-bit makefile
  makefile.64sunu   =>  Sun Ultra 64-bit makefile
  makefile.sco      =>  For SCO OSr5  ELF and Unixware 7 with Native cc
@@ -216,7 +218,7 @@
 scripts/makefile.* yourself, that is)
 
 
-CFLAGS="-Wall -O3 -funroll-loops \
+CFLAGS="-Wall -O -funroll-loops \
 -malign-loops=2 -malign-functions=2" ./configure --prefix=/usr/include \
 --with-pkgconfigdir=/usr/lib/pkgconfig --includedir=/usr/include
 
diff --git a/KNOWNBUG b/KNOWNBUG
index b199b76..0cc60e8 100644
--- a/KNOWNBUG
+++ b/KNOWNBUG
@@ -1,5 +1,5 @@
 
-Known bugs in libpng version 1.0.26
+Known bugs in libpng version 1.0.27rc1
 
 1. April 22, 2001: pnggccrd.c has been reported to crash on NetBSD when
    reading interlaced PNG files, when assembler code is enabled but running
@@ -19,10 +19,11 @@
    libpng12.so => libpng12.so.0.1.2.9betaN
    that are generated by the custom makefiles.
 
-   STATUS: For now, system library builders should use the custom makefiles.
+4. June 28, 2007: Pnggccrd.c has been reported to be unreliable on
+   several 64-bit platforms running gcc-3.x.  Although it appears to
+   build properly, it does not always read files correctly.
 
-4. March 2007: Building 1.2.16 with PNG_ASSEMBLER_CODE_SUPPORTED;
-   PNG_MMX_CODE_SUPPORTED results in multiple definitions of png_combine_row,
-   png_do_read_interlace, and png_read_filter_row
+   STATUS:  Appears to be a bug in the 64-bit version of gcc-3.4.6.
+   Workarounds are to define PNG_NO_MMX_FILTER_UP|AVG|PAETH (as in
+   pngconf.h) or to upgrade gcc to a newer version.
 
-   STATUS: Investigating.
diff --git a/LICENSE b/LICENSE
index 6baa6eb..3061906 100644
--- a/LICENSE
+++ b/LICENSE
@@ -8,7 +8,7 @@
 If you modify libpng you may insert additional notices immediately following
 this sentence.
 
-libpng versions 1.2.6, August 15, 2004, through 1.0.26, May 15, 2007, are
+libpng versions 1.2.6, August 15, 2004, through 1.0.27rc1, July 31, 2007, are
 Copyright (c) 2004, 2006-2007 Glenn Randers-Pehrson, and are
 distributed according to the same disclaimer and license as libpng-1.2.5
 with the following individual added to the list of Contributing Authors
@@ -106,4 +106,4 @@
 
 Glenn Randers-Pehrson
 glennrp at users.sourceforge.net
-May 15, 2007
+July 31, 2007
diff --git a/Makefile.am b/Makefile.am
index c332fbd..12fb6b7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -109,6 +109,8 @@
 	$(ECHO) 'local: *; };' >> $@.new
 	mv $@.new $@
 
+test: check
+
 # install the .../include headers as links to the new ones
 install-data-hook:
 	cd $(DESTDIR)$(includedir); rm -f png.h pngconf.h
@@ -123,7 +125,7 @@
 	cd $(DESTDIR)$(bindir); $(LN_S) $(PNGLIB_BASENAME)-config libpng-config
 	@set -x;\
 	cd $(DESTDIR)$(libdir);\
-	for ext in a la so; do\
+	for ext in a la so sl dylib; do\
 		rm -f libpng.$$ext;\
                 if test -f $(PNGLIB_BASENAME).$$ext; then\
                        $(LN_S) $(PNGLIB_BASENAME).$$ext libpng.$$ext;\
@@ -137,7 +139,7 @@
 	@if test -n "@compatlib@"; then\
 		set -x;\
 		cd $(DESTDIR)$(libdir);\
-		for ext in a la so; do\
+		for ext in a la so sl dylib; do\
 			rm -f libpng.$$ext;\
 		done;\
 	fi
diff --git a/Makefile.in b/Makefile.in
index 81eaa2a..15d35ea 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1259,6 +1259,8 @@
 	$(ECHO) 'local: *; };' >> $@.new
 	mv $@.new $@
 
+test: check
+
 # install the .../include headers as links to the new ones
 install-data-hook:
 	cd $(DESTDIR)$(includedir); rm -f png.h pngconf.h
@@ -1273,7 +1275,7 @@
 	cd $(DESTDIR)$(bindir); $(LN_S) $(PNGLIB_BASENAME)-config libpng-config
 	@set -x;\
 	cd $(DESTDIR)$(libdir);\
-	for ext in a la so; do\
+	for ext in a la so sl dylib; do\
 		rm -f libpng.$$ext;\
                 if test -f $(PNGLIB_BASENAME).$$ext; then\
                        $(LN_S) $(PNGLIB_BASENAME).$$ext libpng.$$ext;\
@@ -1287,7 +1289,7 @@
 	@if test -n "@compatlib@"; then\
 		set -x;\
 		cd $(DESTDIR)$(libdir);\
-		for ext in a la so; do\
+		for ext in a la so sl dylib; do\
 			rm -f libpng.$$ext;\
 		done;\
 	fi
diff --git a/README b/README
index 4330030..0f03664 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-README for libpng version 1.0.26 - May 15, 2007 (shared library 10.0)
+README for libpng version 1.0.27rc1 - July 31, 2007 (shared library 10.0)
 See the note about version numbers near the top of png.h
 
 See INSTALL for instructions on how to install libpng.
@@ -190,11 +190,11 @@
        descrip.mms      =>  VMS makefile for MMS or MMK
        makefile.std     =>  Generic UNIX makefile (cc, creates static libpng.a)
        makefile.elf     =>  Linux/ELF makefile symbol versioning,
-                            gcc, creates libpng10.so.0.1.0.26)
+                            gcc, creates libpng10.so.0.1.0.27rc1)
        makefile.linux   =>  Linux/ELF makefile
-                            (gcc, creates libpng10.so.0.1.0.26)
+                            (gcc, creates libpng10.so.0.1.0.27rc1)
        makefile.gcmmx   =>  Linux/ELF makefile
-                            (gcc, creates libpng10.so.0.1.0.26,
+                            (gcc, creates libpng10.so.0.1.0.27rc1,
                             uses assembler code tuned for Intel MMX platform)
        makefile.gcc     =>  Generic makefile (gcc, creates static libpng.a)
        makefile.knr     =>  Archaic UNIX Makefile that converts files with
@@ -216,12 +216,12 @@
        makefile.openbsd =>  OpenBSD makefile
        makefile.sgi     =>  Silicon Graphics IRIX (cc, creates static lib)
        makefile.sggcc   =>  Silicon Graphics
-                            (gcc, creates libpng10.so.0.1.0.26)
+                            (gcc, creates libpng10.so.0.1.0.27rc1)
        makefile.sunos   =>  Sun makefile
        makefile.solaris =>  Solaris 2.X makefile
-                            (gcc, creates libpng10.so.0.1.0.26)
+                            (gcc, creates libpng10.so.0.1.0.27rc1)
        makefile.so9     =>  Solaris 9 makefile
-                            (gcc, creates libpng10.so.0.1.0.26)
+                            (gcc, creates libpng10.so.0.1.0.27rc1)
        makefile.32sunu  =>  Sun Ultra 32-bit makefile
        makefile.64sunu  =>  Sun Ultra 64-bit makefile
        makefile.sco     =>  For SCO OSr5  ELF and Unixware 7 with Native cc
diff --git a/Y2KINFO b/Y2KINFO
index a820eae..c20b1b5 100644
--- a/Y2KINFO
+++ b/Y2KINFO
@@ -1,13 +1,13 @@
    Y2K compliance in libpng:
    =========================
 
-      May 15, 2007
+      July 31, 2007
 
       Since the PNG Development group is an ad-hoc body, we can't make
       an official declaration.
 
       This is your unofficial assurance that libpng from version 0.71 and
-      upward through 1.0.26 are Y2K compliant.  It is my belief that earlier
+      upward through 1.0.27rc1 are Y2K compliant.  It is my belief that earlier
       versions were also Y2K compliant.
 
       Libpng only has three year fields.  One is a 2-byte unsigned integer
diff --git a/configure b/configure
index e9fe00c..f674b07 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for libpng 1.0.26.
+# Generated by GNU Autoconf 2.61 for libpng 1.0.27rc1.
 #
 # Report bugs to <png-mng-implement@lists.sourceforge.net>.
 #
@@ -728,8 +728,8 @@
 # Identity of this package.
 PACKAGE_NAME='libpng'
 PACKAGE_TARNAME='libpng'
-PACKAGE_VERSION='1.0.26'
-PACKAGE_STRING='libpng 1.0.26'
+PACKAGE_VERSION='1.0.27rc1'
+PACKAGE_STRING='libpng 1.0.27rc1'
 PACKAGE_BUGREPORT='png-mng-implement@lists.sourceforge.net'
 
 ac_unique_file="pngget.c"
@@ -846,6 +846,7 @@
 CCDEPMODE
 am__fastdepCC_TRUE
 am__fastdepCC_FALSE
+SED
 build
 build_cpu
 build_vendor
@@ -857,7 +858,6 @@
 GREP
 EGREP
 CPP
-SED
 LN_S
 ECHO
 AR
@@ -1405,7 +1405,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures libpng 1.0.26 to adapt to many kinds of systems.
+\`configure' configures libpng 1.0.27rc1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1475,7 +1475,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of libpng 1.0.26:";;
+     short | recursive ) echo "Configuration of libpng 1.0.27rc1:";;
    esac
   cat <<\_ACEOF
 
@@ -1585,7 +1585,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-libpng configure 1.0.26
+libpng configure 1.0.27rc1
 generated by GNU Autoconf 2.61
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1599,7 +1599,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by libpng $as_me 1.0.26, which was
+It was created by libpng $as_me 1.0.27rc1, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   $ $0 $@
@@ -2269,7 +2269,7 @@
 
 # Define the identity of the package.
  PACKAGE='libpng'
- VERSION='1.0.26'
+ VERSION='1.0.27rc1'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -2440,10 +2440,10 @@
 
 
 
-PNGLIB_VERSION=1.0.26
+PNGLIB_VERSION=1.0.27rc1
 PNGLIB_MAJOR=1
 PNGLIB_MINOR=0
-PNGLIB_RELEASE=26
+PNGLIB_RELEASE=27
 
 
 
@@ -3558,12 +3558,13 @@
   test -z "$as_dir" && as_dir=.
   for lt_ac_prog in sed gsed; do
     for ac_exec_ext in '' $ac_executable_extensions; do
-      if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
+      if { test -f "$as_dir/$lt_ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$lt_ac_prog$ac_exec_ext"; }; then
         lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
       fi
     done
   done
 done
+IFS=$as_save_IFS
 lt_ac_max=0
 lt_ac_count=0
 # Add /usr/xpg4/bin/sed as it is typically found on Solaris
@@ -3598,6 +3599,7 @@
 fi
 
 SED=$lt_cv_path_SED
+
 { echo "$as_me:$LINENO: result: $SED" >&5
 echo "${ECHO_T}$SED" >&6; }
 
@@ -4550,8 +4552,8 @@
 echo "${ECHO_T}$lt_cv_path_NM" >&6; }
 NM="$lt_cv_path_NM"
 
-{ echo "$as_me:$LINENO: checking how to recognise dependent libraries" >&5
-echo $ECHO_N "checking how to recognise dependent libraries... $ECHO_C" >&6; }
+{ echo "$as_me:$LINENO: checking how to recognize dependent libraries" >&5
+echo $ECHO_N "checking how to recognize dependent libraries... $ECHO_C" >&6; }
 if test "${lt_cv_deplibs_check_method+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
@@ -4592,16 +4594,22 @@
 
 mingw* | pw32*)
   # Base MSYS/MinGW do not provide the 'file' command needed by
-  # func_win32_libid shell function, so use a weaker test based on 'objdump'.
-  lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
-  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  # func_win32_libid shell function, so use a weaker test based on 'objdump',
+  # unless we find 'file', for example because we are cross-compiling.
+  if ( file / ) >/dev/null 2>&1; then
+    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+    lt_cv_file_magic_cmd='func_win32_libid'
+  else
+    lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+    lt_cv_file_magic_cmd='$OBJDUMP -f'
+  fi
   ;;
 
 darwin* | rhapsody*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-freebsd* | kfreebsd*-gnu | dragonfly*)
+freebsd* | dragonfly*)
   if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
     case $host_cpu in
     i*86 )
@@ -4639,7 +4647,7 @@
   esac
   ;;
 
-interix3*)
+interix[3-9]*)
   # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
   lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$'
   ;;
@@ -4655,7 +4663,7 @@
   ;;
 
 # This must be Linux ELF.
-linux*)
+linux* | k*bsd*-gnu)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
@@ -4689,6 +4697,10 @@
   lt_cv_deplibs_check_method=pass_all
   ;;
 
+rdos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
 solaris*)
   lt_cv_deplibs_check_method=pass_all
   ;;
@@ -4776,7 +4788,7 @@
   ;;
 *-*-irix6*)
   # Find out which ABI we are using.
-  echo '#line 4779 "configure"' > conftest.$ac_ext
+  echo '#line 4791 "configure"' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
   ac_status=$?
@@ -4811,7 +4823,8 @@
   rm -rf conftest*
   ;;
 
-x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+s390*-*linux*|sparc*-*linux*)
   # Find out which ABI we are using.
   echo 'int i;' > conftest.$ac_ext
   if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
@@ -4822,6 +4835,9 @@
     case `/usr/bin/file conftest.o` in
     *32-bit*)
       case $host in
+        x86_64-*kfreebsd*-gnu)
+          LD="${LD-ld} -m elf_i386_fbsd"
+          ;;
         x86_64-*linux*)
           LD="${LD-ld} -m elf_i386"
           ;;
@@ -4838,6 +4854,9 @@
       ;;
     *64-bit*)
       case $host in
+        x86_64-*kfreebsd*-gnu)
+          LD="${LD-ld} -m elf_x86_64_fbsd"
+          ;;
         x86_64-*linux*)
           LD="${LD-ld} -m elf_x86_64"
           ;;
@@ -6401,24 +6420,27 @@
     fi
     ;;
   *)
-    # If test is not a shell built-in, we'll probably end up computing a
-    # maximum length that is only half of the actual maximum length, but
-    # we can't tell.
-    SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
-    while (test "X"`$SHELL $0 --fallback-echo "X$teststring" 2>/dev/null` \
+    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
+    if test -n "$lt_cv_sys_max_cmd_len"; then
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    else
+      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+      while (test "X"`$SHELL $0 --fallback-echo "X$teststring" 2>/dev/null` \
 	       = "XX$teststring") >/dev/null 2>&1 &&
-	    new_result=`expr "X$teststring" : ".*" 2>&1` &&
-	    lt_cv_sys_max_cmd_len=$new_result &&
-	    test $i != 17 # 1/2 MB should be enough
-    do
-      i=`expr $i + 1`
-      teststring=$teststring$teststring
-    done
-    teststring=
-    # Add a significant safety factor because C++ compilers can tack on massive
-    # amounts of additional arguments before passing them to the linker.
-    # It appears as though 1/2 is a usable value.
-    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+	      new_result=`expr "X$teststring" : ".*" 2>&1` &&
+	      lt_cv_sys_max_cmd_len=$new_result &&
+	      test $i != 17 # 1/2 MB should be enough
+      do
+        i=`expr $i + 1`
+        teststring=$teststring$teststring
+      done
+      teststring=
+      # Add a significant safety factor because C++ compilers can tack on massive
+      # amounts of additional arguments before passing them to the linker.
+      # It appears as though 1/2 is a usable value.
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    fi
     ;;
   esac
 
@@ -6435,6 +6457,7 @@
 
 
 
+
 # Check for command to grab the raw symbol name followed by C symbol from nm.
 { echo "$as_me:$LINENO: checking command to parse $NM output from $compiler object" >&5
 echo $ECHO_N "checking command to parse $NM output from $compiler object... $ECHO_C" >&6; }
@@ -6472,7 +6495,7 @@
   lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
   lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
   ;;
-linux*)
+linux* | k*bsd*-gnu)
   if test "$host_cpu" = ia64; then
     symcode='[ABCDGIRSTW]'
     lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
@@ -7023,7 +7046,7 @@
 test -z "$ac_objext" && ac_objext=o
 
 # Determine commands to create old-style static archives.
-old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs'
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
 old_postinstall_cmds='chmod 644 $oldlib'
 old_postuninstall_cmds=
 
@@ -7223,10 +7246,10 @@
 objext=$objext
 
 # Code to be used in simple compile tests
-lt_simple_compile_test_code="int some_variable = 0;\n"
+lt_simple_compile_test_code="int some_variable = 0;"
 
 # Code to be used in simple link tests
-lt_simple_link_test_code='int main(){return(0);}\n'
+lt_simple_link_test_code='int main(){return(0);}'
 
 
 # If no C compiler was specified, use CC.
@@ -7241,13 +7264,13 @@
 
 # save warnings/boilerplate of simple test code
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
 eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_compiler_boilerplate=`cat conftest.err`
 $rm conftest*
 
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_link_test_code" >conftest.$ac_ext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
 eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_linker_boilerplate=`cat conftest.err`
 $rm conftest*
@@ -7267,7 +7290,7 @@
 else
   lt_cv_prog_compiler_rtti_exceptions=no
   ac_outfile=conftest.$ac_objext
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
    lt_compiler_flag="-fno-rtti -fno-exceptions"
    # Insert the option either (1) after the last *FLAGS variable, or
    # (2) before a word containing "conftest.", or (3) at the end.
@@ -7278,11 +7301,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:7281: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:7304: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:7285: \$? = $ac_status" >&5
+   echo "$as_me:7308: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -7333,13 +7356,15 @@
       lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4'
       ;;
 
-    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
       # PIC is the default for these OSes.
       ;;
 
-    mingw* | pw32* | os2*)
+    mingw* | cygwin* | pw32* | os2*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
       lt_prog_compiler_pic='-DDLL_EXPORT'
       ;;
 
@@ -7349,7 +7374,7 @@
       lt_prog_compiler_pic='-fno-common'
       ;;
 
-    interix3*)
+    interix[3-9]*)
       # Interix 3.x gcc -fpic/-fPIC options generate broken code.
       # Instead, we relocate shared libraries at runtime.
       ;;
@@ -7407,7 +7432,7 @@
        esac
        ;;
 
-    mingw* | pw32* | os2*)
+    mingw* | cygwin* | pw32* | os2*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
       lt_prog_compiler_pic='-DDLL_EXPORT'
@@ -7440,7 +7465,7 @@
       lt_prog_compiler_static='-Bstatic'
       ;;
 
-    linux*)
+    linux* | k*bsd*-gnu)
       case $cc_basename in
       icc* | ecc*)
 	lt_prog_compiler_wl='-Wl,'
@@ -7459,6 +7484,22 @@
         # All Alpha code is PIC.
         lt_prog_compiler_static='-non_shared'
         ;;
+      *)
+        case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)
+	  # Sun C 5.9
+	  lt_prog_compiler_pic='-KPIC'
+	  lt_prog_compiler_static='-Bstatic'
+	  lt_prog_compiler_wl='-Wl,'
+	  ;;
+	*Sun\ F*)
+	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
+	  lt_prog_compiler_pic='-KPIC'
+	  lt_prog_compiler_static='-Bstatic'
+	  lt_prog_compiler_wl=''
+	  ;;
+	esac
+	;;
       esac
       ;;
 
@@ -7468,6 +7509,10 @@
       lt_prog_compiler_static='-non_shared'
       ;;
 
+    rdos*)
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
     solaris*)
       lt_prog_compiler_pic='-KPIC'
       lt_prog_compiler_static='-Bstatic'
@@ -7535,7 +7580,7 @@
 else
   lt_prog_compiler_pic_works=no
   ac_outfile=conftest.$ac_objext
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
    lt_compiler_flag="$lt_prog_compiler_pic -DPIC"
    # Insert the option either (1) after the last *FLAGS variable, or
    # (2) before a word containing "conftest.", or (3) at the end.
@@ -7546,11 +7591,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:7549: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:7594: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:7553: \$? = $ac_status" >&5
+   echo "$as_me:7598: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -7599,7 +7644,7 @@
   lt_prog_compiler_static_works=no
    save_LDFLAGS="$LDFLAGS"
    LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
-   printf "$lt_simple_link_test_code" > conftest.$ac_ext
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
    if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
      # The linker can only warn and ignore the option if not recognized
      # So say no if there are warnings
@@ -7639,7 +7684,7 @@
    mkdir conftest
    cd conftest
    mkdir out
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
    lt_compiler_flag="-o out/conftest2.$ac_objext"
    # Insert the option either (1) after the last *FLAGS variable, or
@@ -7650,11 +7695,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:7653: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:7698: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:7657: \$? = $ac_status" >&5
+   echo "$as_me:7702: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -7846,7 +7891,7 @@
       allow_undefined_flag=unsupported
       always_export_symbols=no
       enable_shared_with_static_runtimes=yes
-      export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS] /s/.* \([^ ]*\)/\1 DATA/'\'' | $SED -e '\''/^[AITW] /s/.* //'\'' | sort | uniq > $export_symbols'
+      export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/'\'' -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
 
       if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
         archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
@@ -7864,7 +7909,7 @@
       fi
       ;;
 
-    interix3*)
+    interix[3-9]*)
       hardcode_direct=no
       hardcode_shlibpath_var=no
       hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
@@ -7879,7 +7924,7 @@
       archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
       ;;
 
-    linux*)
+    gnu* | linux* | k*bsd*-gnu)
       if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
 	tmp_addflag=
 	case $cc_basename,$host_cpu in
@@ -7897,13 +7942,22 @@
 	ifc* | ifort*)			# Intel Fortran compiler
 	  tmp_addflag=' -nofor_main' ;;
 	esac
-	archive_cmds='$CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)			# Sun C 5.9
+	  whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_sharedflag='-G' ;;
+	*Sun\ F*)			# Sun Fortran 8.3
+	  tmp_sharedflag='-G' ;;
+	*)
+	  tmp_sharedflag='-shared' ;;
+	esac
+	archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
 
 	if test $supports_anon_versioning = yes; then
 	  archive_expsym_cmds='$echo "{ global:" > $output_objdir/$libname.ver~
   cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
   $echo "local: *; };" >> $output_objdir/$libname.ver~
-	  $CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	  $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
 	fi
       else
 	ld_shlibs=no
@@ -8062,7 +8116,7 @@
   	   strings "$collect2name" | grep resolve_lib_name >/dev/null
 	  then
   	  # We have reworked collect2
-  	  hardcode_direct=yes
+  	  :
 	  else
   	  # We have old collect2
   	  hardcode_direct=unsupported
@@ -8136,11 +8190,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -8195,11 +8256,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -8253,7 +8321,7 @@
       # The linker will automatically build a .lib file if we build a DLL.
       old_archive_From_new_cmds='true'
       # FIXME: Should let the user specify the lib program.
-      old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs'
+      old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs'
       fix_srcfile_path='`cygpath -w "$srcfile"`'
       enable_shared_with_static_runtimes=yes
       ;;
@@ -8295,10 +8363,10 @@
       case $cc_basename in
         xlc*)
          output_verbose_link_cmd='echo'
-         archive_cmds='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+         archive_cmds='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring'
          module_cmds='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
           # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
-         archive_expsym_cmds='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+         archive_expsym_cmds='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           module_expsym_cmds='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           ;;
        *)
@@ -8338,7 +8406,7 @@
       ;;
 
     # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
-    freebsd* | kfreebsd*-gnu | dragonfly*)
+    freebsd* | dragonfly*)
       archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
       hardcode_libdir_flag_spec='-R$libdir'
       hardcode_direct=yes
@@ -8460,24 +8528,28 @@
       ;;
 
     openbsd*)
-      hardcode_direct=yes
-      hardcode_shlibpath_var=no
-      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-	archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
-	hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
-	export_dynamic_flag_spec='${wl}-E'
+      if test -f /usr/libexec/ld.so; then
+	hardcode_direct=yes
+	hardcode_shlibpath_var=no
+	if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	  archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	  hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+	  export_dynamic_flag_spec='${wl}-E'
+	else
+	  case $host_os in
+	   openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+	     archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	     hardcode_libdir_flag_spec='-R$libdir'
+	     ;;
+	   *)
+	     archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	     hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+	     ;;
+	  esac
+        fi
       else
-       case $host_os in
-	 openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
-	   archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
-	   hardcode_libdir_flag_spec='-R$libdir'
-	   ;;
-	 *)
-	   archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	   hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
-	   ;;
-       esac
+	ld_shlibs=no
       fi
       ;;
 
@@ -8536,17 +8608,16 @@
       case $host_os in
       solaris2.[0-5] | solaris2.[0-5].*) ;;
       *)
- 	# The compiler driver will combine linker options so we
- 	# cannot just pass the convience library names through
- 	# without $wl, iff we do not link with $LD.
- 	# Luckily, gcc supports the same syntax we need for Sun Studio.
+	# The compiler driver will combine and reorder linker options,
+	# but understands `-z linker_flag'.  GCC discards it without `$wl',
+	# but is careful enough not to reorder.
  	# Supported since Solaris 2.6 (maybe 2.5.1?)
- 	case $wlarc in
- 	'')
- 	  whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;;
- 	*)
- 	  whole_archive_flag_spec='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract' ;;
- 	esac ;;
+	if test "$GCC" = yes; then
+	  whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+	else
+	  whole_archive_flag_spec='-z allextract$convenience -z defaultextract'
+	fi
+	;;
       esac
       link_all_deplibs=yes
       ;;
@@ -8603,7 +8674,7 @@
       fi
       ;;
 
-    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7*)
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
       no_undefined_flag='${wl}-z,text'
       archive_cmds_need_lc=no
       hardcode_shlibpath_var=no
@@ -8680,7 +8751,7 @@
       { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5
 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; }
       $rm conftest*
-      printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+      echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
       if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
@@ -8738,17 +8809,55 @@
 version_type=none
 dynamic_linker="$host_os ld.so"
 sys_lib_dlsearch_path_spec="/lib /usr/lib"
+
 if test "$GCC" = yes; then
-  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
-  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
+  case $host_os in
+    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
+    *) lt_awk_arg="/^libraries:/" ;;
+  esac
+  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+  if echo "$lt_search_path_spec" | grep ';' >/dev/null ; then
     # if the path contains ";" then we assume it to be the separator
     # otherwise default to the standard path separator (i.e. ":") - it is
     # assumed that no part of a normal pathname contains ";" but that should
     # okay in the real world where ";" in dirpaths is itself problematic.
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+    lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e 's/;/ /g'`
   else
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+    lt_search_path_spec=`echo "$lt_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
   fi
+  # Ok, now we have the path, separated by spaces, we can step through it
+  # and add multilib dir if necessary.
+  lt_tmp_lt_search_path_spec=
+  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
+  for lt_sys_path in $lt_search_path_spec; do
+    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
+    else
+      test -d "$lt_sys_path" && \
+	lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
+    fi
+  done
+  lt_search_path_spec=`echo $lt_tmp_lt_search_path_spec | awk '
+BEGIN {RS=" "; FS="/|\n";} {
+  lt_foo="";
+  lt_count=0;
+  for (lt_i = NF; lt_i > 0; lt_i--) {
+    if ($lt_i != "" && $lt_i != ".") {
+      if ($lt_i == "..") {
+        lt_count++;
+      } else {
+        if (lt_count == 0) {
+          lt_foo="/" $lt_i lt_foo;
+        } else {
+          lt_count--;
+        }
+      }
+    }
+  }
+  if (lt_foo != "") { lt_freq[lt_foo]++; }
+  if (lt_freq[lt_foo] == 1) { print lt_foo; }
+}'`
+  sys_lib_search_path_spec=`echo $lt_search_path_spec`
 else
   sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
 fi
@@ -8908,12 +9017,8 @@
   shlibpath_overrides_runpath=yes
   shlibpath_var=DYLD_LIBRARY_PATH
   shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
-  if test "$GCC" = yes; then
-    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
-  else
-    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
-  fi
+
+  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"
   sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
   ;;
 
@@ -8930,18 +9035,6 @@
   dynamic_linker=no
   ;;
 
-kfreebsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 freebsd* | dragonfly*)
   # DragonFly does not have aout.  When/if they implement a new
   # versioning mechanism, adjust this.
@@ -8979,7 +9072,7 @@
     shlibpath_overrides_runpath=no
     hardcode_into_libs=yes
     ;;
-  freebsd*) # from 4.6 on
+  *) # from 4.6 on, and DragonFly
     shlibpath_overrides_runpath=yes
     hardcode_into_libs=yes
     ;;
@@ -9042,7 +9135,7 @@
   postinstall_cmds='chmod 555 $lib'
   ;;
 
-interix3*)
+interix[3-9]*)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -9097,7 +9190,7 @@
   ;;
 
 # This must be Linux ELF.
-linux*)
+linux* | k*bsd*-gnu)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -9113,7 +9206,7 @@
 
   # Append ld.so.conf contents to the search path
   if test -f /etc/ld.so.conf; then
-    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ 	]*hwcap[ 	]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
     sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
   fi
 
@@ -9126,18 +9219,6 @@
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
-knetbsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -9219,6 +9300,10 @@
   sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
   ;;
 
+rdos*)
+  dynamic_linker=no
+  ;;
+
 solaris*)
   version_type=linux
   need_lib_prefix=no
@@ -9372,6 +9457,7 @@
    darwin*)
        if test -n "$STRIP" ; then
          striplib="$STRIP -x"
+         old_striplib="$STRIP -S"
          { echo "$as_me:$LINENO: result: yes" >&5
 echo "${ECHO_T}yes" >&6; }
        else
@@ -9958,7 +10044,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 9961 "configure"
+#line 10047 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -10058,7 +10144,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10061 "configure"
+#line 10147 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -10256,6 +10342,7 @@
     module_cmds \
     module_expsym_cmds \
     lt_cv_prog_compiler_c_o \
+    fix_srcfile_path \
     exclude_expsyms \
     include_expsyms; do
 
@@ -10300,7 +10387,7 @@
 # Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
 # NOTE: Changes made to this file will be lost: look at ltmain.sh.
 #
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
 # Free Software Foundation, Inc.
 #
 # This file is part of GNU Libtool:
@@ -10624,7 +10711,7 @@
 sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
 
 # Fix the shell variable \$srcfile for the compiler.
-fix_srcfile_path="$fix_srcfile_path"
+fix_srcfile_path=$lt_fix_srcfile_path
 
 # Set to yes if exported symbols are required.
 always_export_symbols=$always_export_symbols
@@ -10793,10 +10880,10 @@
 objext_CXX=$objext
 
 # Code to be used in simple compile tests
-lt_simple_compile_test_code="int some_variable = 0;\n"
+lt_simple_compile_test_code="int some_variable = 0;"
 
 # Code to be used in simple link tests
-lt_simple_link_test_code='int main(int, char *[]) { return(0); }\n'
+lt_simple_link_test_code='int main(int, char *[]) { return(0); }'
 
 # ltmain only uses $CC for tagged configurations so make sure $CC is set.
 
@@ -10812,13 +10899,13 @@
 
 # save warnings/boilerplate of simple test code
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
 eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_compiler_boilerplate=`cat conftest.err`
 $rm conftest*
 
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_link_test_code" >conftest.$ac_ext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
 eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_linker_boilerplate=`cat conftest.err`
 $rm conftest*
@@ -11077,7 +11164,7 @@
 	   strings "$collect2name" | grep resolve_lib_name >/dev/null
 	then
 	  # We have reworked collect2
-	  hardcode_direct_CXX=yes
+	  :
 	else
 	  # We have old collect2
 	  hardcode_direct_CXX=unsupported
@@ -11151,11 +11238,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -11211,11 +11305,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -11334,10 +11435,10 @@
       case $cc_basename in
         xlc*)
          output_verbose_link_cmd='echo'
-          archive_cmds_CXX='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+          archive_cmds_CXX='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring'
           module_cmds_CXX='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
           # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
-          archive_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          archive_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           module_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           ;;
        *)
@@ -11371,7 +11472,7 @@
   freebsd-elf*)
     archive_cmds_need_lc_CXX=no
     ;;
-  freebsd* | kfreebsd*-gnu | dragonfly*)
+  freebsd* | dragonfly*)
     # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
     # conventions
     ld_shlibs_CXX=yes
@@ -11420,9 +11521,7 @@
       hardcode_libdir_separator_CXX=:
 
       case $host_cpu in
-      hppa*64*|ia64*)
-	hardcode_libdir_flag_spec_ld_CXX='+b $libdir'
-        ;;
+      hppa*64*|ia64*) ;;
       *)
 	export_dynamic_flag_spec_CXX='${wl}-E'
         ;;
@@ -11490,7 +11589,7 @@
 	;;
     esac
     ;;
-  interix3*)
+  interix[3-9]*)
     hardcode_direct_CXX=no
     hardcode_shlibpath_var_CXX=no
     hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
@@ -11530,7 +11629,7 @@
     hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
     hardcode_libdir_separator_CXX=:
     ;;
-  linux*)
+  linux* | k*bsd*-gnu)
     case $cc_basename in
       KCC*)
 	# Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -11610,6 +11709,29 @@
 	# dependencies.
 	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
 	;;
+      *)
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)
+	  # Sun C++ 5.9
+	  no_undefined_flag_CXX=' -zdefs'
+	  archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
+	  hardcode_libdir_flag_spec_CXX='-R$libdir'
+	  whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+
+	  # Not sure whether something based on
+	  # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
+	  # would be better.
+	  output_verbose_link_cmd='echo'
+
+	  # Archives containing C++ object files must be created using
+	  # "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	  # necessary to make sure instantiated templates are included
+	  # in the archive.
+	  old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
+	  ;;
+	esac
+	;;
     esac
     ;;
   lynxos*)
@@ -11648,16 +11770,20 @@
     ld_shlibs_CXX=no
     ;;
   openbsd*)
-    hardcode_direct_CXX=yes
-    hardcode_shlibpath_var_CXX=no
-    archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
-    hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
-    if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-      archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
-      export_dynamic_flag_spec_CXX='${wl}-E'
-      whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    if test -f /usr/libexec/ld.so; then
+      hardcode_direct_CXX=yes
+      hardcode_shlibpath_var_CXX=no
+      archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+      hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
+      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+	export_dynamic_flag_spec_CXX='${wl}-E'
+	whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+      fi
+      output_verbose_link_cmd='echo'
+    else
+      ld_shlibs_CXX=no
     fi
-    output_verbose_link_cmd='echo'
     ;;
   osf3*)
     case $cc_basename in
@@ -11819,15 +11945,10 @@
 	case $host_os in
 	  solaris2.[0-5] | solaris2.[0-5].*) ;;
 	  *)
-	    # The C++ compiler is used as linker so we must use $wl
-	    # flag to pass the commands to the underlying system
-	    # linker. We must also pass each convience library through
-	    # to the system linker between allextract/defaultextract.
-	    # The C++ compiler will combine linker options so we
-	    # cannot just pass the convience library names through
-	    # without $wl.
+	    # The compiler driver will combine and reorder linker options,
+	    # but understands `-z linker_flag'.
 	    # Supported since Solaris 2.6 (maybe 2.5.1?)
-	    whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract'
+	    whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract'
 	    ;;
 	esac
 	link_all_deplibs_CXX=yes
@@ -11874,6 +11995,12 @@
 	  fi
 
 	  hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir'
+	  case $host_os in
+	  solaris2.[0-5] | solaris2.[0-5].*) ;;
+	  *)
+	    whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+	    ;;
+	  esac
 	fi
 	;;
     esac
@@ -12065,7 +12192,7 @@
 
 # PORTME: override above test on systems where it is broken
 case $host_os in
-interix3*)
+interix[3-9]*)
   # Interix 3.5 installs completely hosed .la files for C++, so rather than
   # hack all around it, let's just trust "g++" to DTRT.
   predep_objects_CXX=
@@ -12073,13 +12200,46 @@
   postdeps_CXX=
   ;;
 
+linux*)
+  case `$CC -V 2>&1 | sed 5q` in
+  *Sun\ C*)
+    # Sun C++ 5.9
+    #
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+    if test "$solaris_use_stlport4" != yes; then
+      postdeps_CXX='-library=Cstd -library=Crun'
+    fi
+    ;;
+  esac
+  ;;
+
 solaris*)
   case $cc_basename in
   CC*)
+    # The more standards-conforming stlport4 library is
+    # incompatible with the Cstd library. Avoid specifying
+    # it if it's in CXXFLAGS. Ignore libCrun as
+    # -library=stlport4 depends on it.
+    case " $CXX $CXXFLAGS " in
+    *" -library=stlport4 "*)
+      solaris_use_stlport4=yes
+      ;;
+    esac
+
     # Adding this requires a known-good setup of shared libraries for
     # Sun compiler versions before 5.6, else PIC objects from an old
     # archive will be linked into the output, leading to subtle bugs.
-    postdeps_CXX='-lCstd -lCrun'
+    if test "$solaris_use_stlport4" != yes; then
+      postdeps_CXX='-library=Cstd -library=Crun'
+    fi
     ;;
   esac
   ;;
@@ -12116,12 +12276,14 @@
       # like `-m68040'.
       lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4'
       ;;
-    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
       # PIC is the default for these OSes.
       ;;
-    mingw* | os2* | pw32*)
+    mingw* | cygwin* | os2* | pw32*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
       lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
       ;;
     darwin* | rhapsody*)
@@ -12133,7 +12295,7 @@
       # DJGPP does not support shared libraries at all
       lt_prog_compiler_pic_CXX=
       ;;
-    interix3*)
+    interix[3-9]*)
       # Interix 3.x gcc -fpic/-fPIC options generate broken code.
       # Instead, we relocate shared libraries at runtime.
       ;;
@@ -12199,7 +12361,7 @@
 	    ;;
 	esac
 	;;
-      freebsd* | kfreebsd*-gnu | dragonfly*)
+      freebsd* | dragonfly*)
 	# FreeBSD uses GNU C++
 	;;
       hpux9* | hpux10* | hpux11*)
@@ -12242,7 +12404,7 @@
 	    ;;
 	esac
 	;;
-      linux*)
+      linux* | k*bsd*-gnu)
 	case $cc_basename in
 	  KCC*)
 	    # KAI C++ Compiler
@@ -12269,6 +12431,14 @@
 	    lt_prog_compiler_static_CXX='-non_shared'
 	    ;;
 	  *)
+	    case `$CC -V 2>&1 | sed 5q` in
+	    *Sun\ C*)
+	      # Sun C++ 5.9
+	      lt_prog_compiler_pic_CXX='-KPIC'
+	      lt_prog_compiler_static_CXX='-Bstatic'
+	      lt_prog_compiler_wl_CXX='-Qoption ld '
+	      ;;
+	    esac
 	    ;;
 	esac
 	;;
@@ -12383,7 +12553,7 @@
 else
   lt_prog_compiler_pic_works_CXX=no
   ac_outfile=conftest.$ac_objext
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
    lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC"
    # Insert the option either (1) after the last *FLAGS variable, or
    # (2) before a word containing "conftest.", or (3) at the end.
@@ -12394,11 +12564,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:12397: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:12567: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:12401: \$? = $ac_status" >&5
+   echo "$as_me:12571: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -12447,7 +12617,7 @@
   lt_prog_compiler_static_works_CXX=no
    save_LDFLAGS="$LDFLAGS"
    LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
-   printf "$lt_simple_link_test_code" > conftest.$ac_ext
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
    if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
      # The linker can only warn and ignore the option if not recognized
      # So say no if there are warnings
@@ -12487,7 +12657,7 @@
    mkdir conftest
    cd conftest
    mkdir out
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
    lt_compiler_flag="-o out/conftest2.$ac_objext"
    # Insert the option either (1) after the last *FLAGS variable, or
@@ -12498,11 +12668,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:12501: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:12671: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:12505: \$? = $ac_status" >&5
+   echo "$as_me:12675: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -12568,7 +12738,7 @@
     export_symbols_cmds_CXX="$ltdll_cmds"
   ;;
   cygwin* | mingw*)
-    export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS] /s/.* \([^ ]*\)/\1 DATA/;/^.* __nm__/s/^.* __nm__\([^ ]*\) [^ ]*/\1 DATA/;/^I /d;/^[AITW] /s/.* //'\'' | sort | uniq > $export_symbols'
+    export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;/^.*[ ]__nm__/s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
   ;;
   *)
     export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
@@ -12599,7 +12769,7 @@
       { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5
 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; }
       $rm conftest*
-      printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+      echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
       if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
@@ -12657,20 +12827,7 @@
 version_type=none
 dynamic_linker="$host_os ld.so"
 sys_lib_dlsearch_path_spec="/lib /usr/lib"
-if test "$GCC" = yes; then
-  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
-  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
-    # if the path contains ";" then we assume it to be the separator
-    # otherwise default to the standard path separator (i.e. ":") - it is
-    # assumed that no part of a normal pathname contains ";" but that should
-    # okay in the real world where ";" in dirpaths is itself problematic.
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
-  else
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
-  fi
-else
-  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
-fi
+
 need_lib_prefix=unknown
 hardcode_into_libs=no
 
@@ -12827,12 +12984,7 @@
   shlibpath_overrides_runpath=yes
   shlibpath_var=DYLD_LIBRARY_PATH
   shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
-  if test "$GCC" = yes; then
-    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
-  else
-    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
-  fi
+
   sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
   ;;
 
@@ -12849,18 +13001,6 @@
   dynamic_linker=no
   ;;
 
-kfreebsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 freebsd* | dragonfly*)
   # DragonFly does not have aout.  When/if they implement a new
   # versioning mechanism, adjust this.
@@ -12898,7 +13038,7 @@
     shlibpath_overrides_runpath=no
     hardcode_into_libs=yes
     ;;
-  freebsd*) # from 4.6 on
+  *) # from 4.6 on, and DragonFly
     shlibpath_overrides_runpath=yes
     hardcode_into_libs=yes
     ;;
@@ -12961,7 +13101,7 @@
   postinstall_cmds='chmod 555 $lib'
   ;;
 
-interix3*)
+interix[3-9]*)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -13016,7 +13156,7 @@
   ;;
 
 # This must be Linux ELF.
-linux*)
+linux* | k*bsd*-gnu)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -13032,7 +13172,7 @@
 
   # Append ld.so.conf contents to the search path
   if test -f /etc/ld.so.conf; then
-    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ 	]*hwcap[ 	]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
     sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
   fi
 
@@ -13045,18 +13185,6 @@
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
-knetbsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -13138,6 +13266,10 @@
   sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
   ;;
 
+rdos*)
+  dynamic_linker=no
+  ;;
+
 solaris*)
   version_type=linux
   need_lib_prefix=no
@@ -13332,6 +13464,7 @@
     module_cmds_CXX \
     module_expsym_cmds_CXX \
     lt_cv_prog_compiler_c_o_CXX \
+    fix_srcfile_path_CXX \
     exclude_expsyms_CXX \
     include_expsyms_CXX; do
 
@@ -13652,7 +13785,7 @@
 sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
 
 # Fix the shell variable \$srcfile for the compiler.
-fix_srcfile_path="$fix_srcfile_path_CXX"
+fix_srcfile_path=$lt_fix_srcfile_path
 
 # Set to yes if exported symbols are required.
 always_export_symbols=$always_export_symbols_CXX
@@ -13743,10 +13876,17 @@
 objext_F77=$objext
 
 # Code to be used in simple compile tests
-lt_simple_compile_test_code="      subroutine t\n      return\n      end\n"
+lt_simple_compile_test_code="\
+      subroutine t
+      return
+      end
+"
 
 # Code to be used in simple link tests
-lt_simple_link_test_code="      program t\n      end\n"
+lt_simple_link_test_code="\
+      program t
+      end
+"
 
 # ltmain only uses $CC for tagged configurations so make sure $CC is set.
 
@@ -13762,13 +13902,13 @@
 
 # save warnings/boilerplate of simple test code
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
 eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_compiler_boilerplate=`cat conftest.err`
 $rm conftest*
 
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_link_test_code" >conftest.$ac_ext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
 eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_linker_boilerplate=`cat conftest.err`
 $rm conftest*
@@ -13855,13 +13995,15 @@
       lt_prog_compiler_pic_F77='-m68020 -resident32 -malways-restore-a4'
       ;;
 
-    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
       # PIC is the default for these OSes.
       ;;
 
-    mingw* | pw32* | os2*)
+    mingw* | cygwin* | pw32* | os2*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
       lt_prog_compiler_pic_F77='-DDLL_EXPORT'
       ;;
 
@@ -13871,7 +14013,7 @@
       lt_prog_compiler_pic_F77='-fno-common'
       ;;
 
-    interix3*)
+    interix[3-9]*)
       # Interix 3.x gcc -fpic/-fPIC options generate broken code.
       # Instead, we relocate shared libraries at runtime.
       ;;
@@ -13929,7 +14071,7 @@
        esac
        ;;
 
-    mingw* | pw32* | os2*)
+    mingw* | cygwin* | pw32* | os2*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
       lt_prog_compiler_pic_F77='-DDLL_EXPORT'
@@ -13962,7 +14104,7 @@
       lt_prog_compiler_static_F77='-Bstatic'
       ;;
 
-    linux*)
+    linux* | k*bsd*-gnu)
       case $cc_basename in
       icc* | ecc*)
 	lt_prog_compiler_wl_F77='-Wl,'
@@ -13981,6 +14123,22 @@
         # All Alpha code is PIC.
         lt_prog_compiler_static_F77='-non_shared'
         ;;
+      *)
+        case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)
+	  # Sun C 5.9
+	  lt_prog_compiler_pic_F77='-KPIC'
+	  lt_prog_compiler_static_F77='-Bstatic'
+	  lt_prog_compiler_wl_F77='-Wl,'
+	  ;;
+	*Sun\ F*)
+	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
+	  lt_prog_compiler_pic_F77='-KPIC'
+	  lt_prog_compiler_static_F77='-Bstatic'
+	  lt_prog_compiler_wl_F77=''
+	  ;;
+	esac
+	;;
       esac
       ;;
 
@@ -13990,6 +14148,10 @@
       lt_prog_compiler_static_F77='-non_shared'
       ;;
 
+    rdos*)
+      lt_prog_compiler_static_F77='-non_shared'
+      ;;
+
     solaris*)
       lt_prog_compiler_pic_F77='-KPIC'
       lt_prog_compiler_static_F77='-Bstatic'
@@ -14057,7 +14219,7 @@
 else
   lt_prog_compiler_pic_works_F77=no
   ac_outfile=conftest.$ac_objext
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
    lt_compiler_flag="$lt_prog_compiler_pic_F77"
    # Insert the option either (1) after the last *FLAGS variable, or
    # (2) before a word containing "conftest.", or (3) at the end.
@@ -14068,11 +14230,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14071: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14233: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:14075: \$? = $ac_status" >&5
+   echo "$as_me:14237: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -14121,7 +14283,7 @@
   lt_prog_compiler_static_works_F77=no
    save_LDFLAGS="$LDFLAGS"
    LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
-   printf "$lt_simple_link_test_code" > conftest.$ac_ext
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
    if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
      # The linker can only warn and ignore the option if not recognized
      # So say no if there are warnings
@@ -14161,7 +14323,7 @@
    mkdir conftest
    cd conftest
    mkdir out
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
    lt_compiler_flag="-o out/conftest2.$ac_objext"
    # Insert the option either (1) after the last *FLAGS variable, or
@@ -14172,11 +14334,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:14175: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:14337: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:14179: \$? = $ac_status" >&5
+   echo "$as_me:14341: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -14368,7 +14530,7 @@
       allow_undefined_flag_F77=unsupported
       always_export_symbols_F77=no
       enable_shared_with_static_runtimes_F77=yes
-      export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS] /s/.* \([^ ]*\)/\1 DATA/'\'' | $SED -e '\''/^[AITW] /s/.* //'\'' | sort | uniq > $export_symbols'
+      export_symbols_cmds_F77='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/'\'' -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
 
       if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
         archive_cmds_F77='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
@@ -14386,7 +14548,7 @@
       fi
       ;;
 
-    interix3*)
+    interix[3-9]*)
       hardcode_direct_F77=no
       hardcode_shlibpath_var_F77=no
       hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
@@ -14401,7 +14563,7 @@
       archive_expsym_cmds_F77='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
       ;;
 
-    linux*)
+    gnu* | linux* | k*bsd*-gnu)
       if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
 	tmp_addflag=
 	case $cc_basename,$host_cpu in
@@ -14419,13 +14581,22 @@
 	ifc* | ifort*)			# Intel Fortran compiler
 	  tmp_addflag=' -nofor_main' ;;
 	esac
-	archive_cmds_F77='$CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)			# Sun C 5.9
+	  whole_archive_flag_spec_F77='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_sharedflag='-G' ;;
+	*Sun\ F*)			# Sun Fortran 8.3
+	  tmp_sharedflag='-G' ;;
+	*)
+	  tmp_sharedflag='-shared' ;;
+	esac
+	archive_cmds_F77='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
 
 	if test $supports_anon_versioning = yes; then
 	  archive_expsym_cmds_F77='$echo "{ global:" > $output_objdir/$libname.ver~
   cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
   $echo "local: *; };" >> $output_objdir/$libname.ver~
-	  $CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	  $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
 	fi
       else
 	ld_shlibs_F77=no
@@ -14584,7 +14755,7 @@
   	   strings "$collect2name" | grep resolve_lib_name >/dev/null
 	  then
   	  # We have reworked collect2
-  	  hardcode_direct_F77=yes
+  	  :
 	  else
   	  # We have old collect2
   	  hardcode_direct_F77=unsupported
@@ -14648,11 +14819,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -14697,11 +14875,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -14755,7 +14940,7 @@
       # The linker will automatically build a .lib file if we build a DLL.
       old_archive_From_new_cmds_F77='true'
       # FIXME: Should let the user specify the lib program.
-      old_archive_cmds_F77='lib /OUT:$oldlib$oldobjs$old_deplibs'
+      old_archive_cmds_F77='lib -OUT:$oldlib$oldobjs$old_deplibs'
       fix_srcfile_path_F77='`cygpath -w "$srcfile"`'
       enable_shared_with_static_runtimes_F77=yes
       ;;
@@ -14797,10 +14982,10 @@
       case $cc_basename in
         xlc*)
          output_verbose_link_cmd='echo'
-         archive_cmds_F77='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+         archive_cmds_F77='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring'
          module_cmds_F77='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
           # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
-         archive_expsym_cmds_F77='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+         archive_expsym_cmds_F77='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           module_expsym_cmds_F77='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           ;;
        *)
@@ -14840,7 +15025,7 @@
       ;;
 
     # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
-    freebsd* | kfreebsd*-gnu | dragonfly*)
+    freebsd* | dragonfly*)
       archive_cmds_F77='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
       hardcode_libdir_flag_spec_F77='-R$libdir'
       hardcode_direct_F77=yes
@@ -14962,24 +15147,28 @@
       ;;
 
     openbsd*)
-      hardcode_direct_F77=yes
-      hardcode_shlibpath_var_F77=no
-      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-	archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	archive_expsym_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
-	hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
-	export_dynamic_flag_spec_F77='${wl}-E'
+      if test -f /usr/libexec/ld.so; then
+	hardcode_direct_F77=yes
+	hardcode_shlibpath_var_F77=no
+	if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	  archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	  hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
+	  export_dynamic_flag_spec_F77='${wl}-E'
+	else
+	  case $host_os in
+	   openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+	     archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	     hardcode_libdir_flag_spec_F77='-R$libdir'
+	     ;;
+	   *)
+	     archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	     hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
+	     ;;
+	  esac
+        fi
       else
-       case $host_os in
-	 openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
-	   archive_cmds_F77='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
-	   hardcode_libdir_flag_spec_F77='-R$libdir'
-	   ;;
-	 *)
-	   archive_cmds_F77='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	   hardcode_libdir_flag_spec_F77='${wl}-rpath,$libdir'
-	   ;;
-       esac
+	ld_shlibs_F77=no
       fi
       ;;
 
@@ -15038,17 +15227,16 @@
       case $host_os in
       solaris2.[0-5] | solaris2.[0-5].*) ;;
       *)
- 	# The compiler driver will combine linker options so we
- 	# cannot just pass the convience library names through
- 	# without $wl, iff we do not link with $LD.
- 	# Luckily, gcc supports the same syntax we need for Sun Studio.
+	# The compiler driver will combine and reorder linker options,
+	# but understands `-z linker_flag'.  GCC discards it without `$wl',
+	# but is careful enough not to reorder.
  	# Supported since Solaris 2.6 (maybe 2.5.1?)
- 	case $wlarc in
- 	'')
- 	  whole_archive_flag_spec_F77='-z allextract$convenience -z defaultextract' ;;
- 	*)
- 	  whole_archive_flag_spec_F77='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract' ;;
- 	esac ;;
+	if test "$GCC" = yes; then
+	  whole_archive_flag_spec_F77='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+	else
+	  whole_archive_flag_spec_F77='-z allextract$convenience -z defaultextract'
+	fi
+	;;
       esac
       link_all_deplibs_F77=yes
       ;;
@@ -15105,7 +15293,7 @@
       fi
       ;;
 
-    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7*)
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
       no_undefined_flag_F77='${wl}-z,text'
       archive_cmds_need_lc_F77=no
       hardcode_shlibpath_var_F77=no
@@ -15182,7 +15370,7 @@
       { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5
 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; }
       $rm conftest*
-      printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+      echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
       if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
@@ -15240,20 +15428,7 @@
 version_type=none
 dynamic_linker="$host_os ld.so"
 sys_lib_dlsearch_path_spec="/lib /usr/lib"
-if test "$GCC" = yes; then
-  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
-  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
-    # if the path contains ";" then we assume it to be the separator
-    # otherwise default to the standard path separator (i.e. ":") - it is
-    # assumed that no part of a normal pathname contains ";" but that should
-    # okay in the real world where ";" in dirpaths is itself problematic.
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
-  else
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
-  fi
-else
-  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
-fi
+
 need_lib_prefix=unknown
 hardcode_into_libs=no
 
@@ -15410,12 +15585,7 @@
   shlibpath_overrides_runpath=yes
   shlibpath_var=DYLD_LIBRARY_PATH
   shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
-  if test "$GCC" = yes; then
-    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
-  else
-    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
-  fi
+
   sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
   ;;
 
@@ -15432,18 +15602,6 @@
   dynamic_linker=no
   ;;
 
-kfreebsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 freebsd* | dragonfly*)
   # DragonFly does not have aout.  When/if they implement a new
   # versioning mechanism, adjust this.
@@ -15481,7 +15639,7 @@
     shlibpath_overrides_runpath=no
     hardcode_into_libs=yes
     ;;
-  freebsd*) # from 4.6 on
+  *) # from 4.6 on, and DragonFly
     shlibpath_overrides_runpath=yes
     hardcode_into_libs=yes
     ;;
@@ -15544,7 +15702,7 @@
   postinstall_cmds='chmod 555 $lib'
   ;;
 
-interix3*)
+interix[3-9]*)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -15599,7 +15757,7 @@
   ;;
 
 # This must be Linux ELF.
-linux*)
+linux* | k*bsd*-gnu)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -15615,7 +15773,7 @@
 
   # Append ld.so.conf contents to the search path
   if test -f /etc/ld.so.conf; then
-    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ 	]*hwcap[ 	]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
     sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
   fi
 
@@ -15628,18 +15786,6 @@
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
-knetbsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -15721,6 +15867,10 @@
   sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
   ;;
 
+rdos*)
+  dynamic_linker=no
+  ;;
+
 solaris*)
   version_type=linux
   need_lib_prefix=no
@@ -15915,6 +16065,7 @@
     module_cmds_F77 \
     module_expsym_cmds_F77 \
     lt_cv_prog_compiler_c_o_F77 \
+    fix_srcfile_path_F77 \
     exclude_expsyms_F77 \
     include_expsyms_F77; do
 
@@ -16235,7 +16386,7 @@
 sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
 
 # Fix the shell variable \$srcfile for the compiler.
-fix_srcfile_path="$fix_srcfile_path_F77"
+fix_srcfile_path=$lt_fix_srcfile_path
 
 # Set to yes if exported symbols are required.
 always_export_symbols=$always_export_symbols_F77
@@ -16293,10 +16444,10 @@
 objext_GCJ=$objext
 
 # Code to be used in simple compile tests
-lt_simple_compile_test_code="class foo {}\n"
+lt_simple_compile_test_code="class foo {}"
 
 # Code to be used in simple link tests
-lt_simple_link_test_code='public class conftest { public static void main(String[] argv) {}; }\n'
+lt_simple_link_test_code='public class conftest { public static void main(String[] argv) {}; }'
 
 # ltmain only uses $CC for tagged configurations so make sure $CC is set.
 
@@ -16312,13 +16463,13 @@
 
 # save warnings/boilerplate of simple test code
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
 eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_compiler_boilerplate=`cat conftest.err`
 $rm conftest*
 
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_link_test_code" >conftest.$ac_ext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
 eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_linker_boilerplate=`cat conftest.err`
 $rm conftest*
@@ -16359,7 +16510,7 @@
 else
   lt_cv_prog_compiler_rtti_exceptions=no
   ac_outfile=conftest.$ac_objext
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
    lt_compiler_flag="-fno-rtti -fno-exceptions"
    # Insert the option either (1) after the last *FLAGS variable, or
    # (2) before a word containing "conftest.", or (3) at the end.
@@ -16370,11 +16521,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:16373: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:16524: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:16377: \$? = $ac_status" >&5
+   echo "$as_me:16528: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -16425,13 +16576,15 @@
       lt_prog_compiler_pic_GCJ='-m68020 -resident32 -malways-restore-a4'
       ;;
 
-    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
       # PIC is the default for these OSes.
       ;;
 
-    mingw* | pw32* | os2*)
+    mingw* | cygwin* | pw32* | os2*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
       lt_prog_compiler_pic_GCJ='-DDLL_EXPORT'
       ;;
 
@@ -16441,7 +16594,7 @@
       lt_prog_compiler_pic_GCJ='-fno-common'
       ;;
 
-    interix3*)
+    interix[3-9]*)
       # Interix 3.x gcc -fpic/-fPIC options generate broken code.
       # Instead, we relocate shared libraries at runtime.
       ;;
@@ -16499,7 +16652,7 @@
        esac
        ;;
 
-    mingw* | pw32* | os2*)
+    mingw* | cygwin* | pw32* | os2*)
       # This hack is so that the source file can tell whether it is being
       # built for inclusion in a dll (and should export symbols for example).
       lt_prog_compiler_pic_GCJ='-DDLL_EXPORT'
@@ -16532,7 +16685,7 @@
       lt_prog_compiler_static_GCJ='-Bstatic'
       ;;
 
-    linux*)
+    linux* | k*bsd*-gnu)
       case $cc_basename in
       icc* | ecc*)
 	lt_prog_compiler_wl_GCJ='-Wl,'
@@ -16551,6 +16704,22 @@
         # All Alpha code is PIC.
         lt_prog_compiler_static_GCJ='-non_shared'
         ;;
+      *)
+        case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)
+	  # Sun C 5.9
+	  lt_prog_compiler_pic_GCJ='-KPIC'
+	  lt_prog_compiler_static_GCJ='-Bstatic'
+	  lt_prog_compiler_wl_GCJ='-Wl,'
+	  ;;
+	*Sun\ F*)
+	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
+	  lt_prog_compiler_pic_GCJ='-KPIC'
+	  lt_prog_compiler_static_GCJ='-Bstatic'
+	  lt_prog_compiler_wl_GCJ=''
+	  ;;
+	esac
+	;;
       esac
       ;;
 
@@ -16560,6 +16729,10 @@
       lt_prog_compiler_static_GCJ='-non_shared'
       ;;
 
+    rdos*)
+      lt_prog_compiler_static_GCJ='-non_shared'
+      ;;
+
     solaris*)
       lt_prog_compiler_pic_GCJ='-KPIC'
       lt_prog_compiler_static_GCJ='-Bstatic'
@@ -16627,7 +16800,7 @@
 else
   lt_prog_compiler_pic_works_GCJ=no
   ac_outfile=conftest.$ac_objext
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
    lt_compiler_flag="$lt_prog_compiler_pic_GCJ"
    # Insert the option either (1) after the last *FLAGS variable, or
    # (2) before a word containing "conftest.", or (3) at the end.
@@ -16638,11 +16811,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:16641: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:16814: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>conftest.err)
    ac_status=$?
    cat conftest.err >&5
-   echo "$as_me:16645: \$? = $ac_status" >&5
+   echo "$as_me:16818: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s "$ac_outfile"; then
      # The compiler can only warn and ignore the option if not recognized
      # So say no if there are warnings other than the usual output.
@@ -16691,7 +16864,7 @@
   lt_prog_compiler_static_works_GCJ=no
    save_LDFLAGS="$LDFLAGS"
    LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
-   printf "$lt_simple_link_test_code" > conftest.$ac_ext
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
    if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
      # The linker can only warn and ignore the option if not recognized
      # So say no if there are warnings
@@ -16731,7 +16904,7 @@
    mkdir conftest
    cd conftest
    mkdir out
-   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
    lt_compiler_flag="-o out/conftest2.$ac_objext"
    # Insert the option either (1) after the last *FLAGS variable, or
@@ -16742,11 +16915,11 @@
    -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
    -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
    -e 's:$: $lt_compiler_flag:'`
-   (eval echo "\"\$as_me:16745: $lt_compile\"" >&5)
+   (eval echo "\"\$as_me:16918: $lt_compile\"" >&5)
    (eval "$lt_compile" 2>out/conftest.err)
    ac_status=$?
    cat out/conftest.err >&5
-   echo "$as_me:16749: \$? = $ac_status" >&5
+   echo "$as_me:16922: \$? = $ac_status" >&5
    if (exit $ac_status) && test -s out/conftest2.$ac_objext
    then
      # The compiler can only warn and ignore the option if not recognized
@@ -16938,7 +17111,7 @@
       allow_undefined_flag_GCJ=unsupported
       always_export_symbols_GCJ=no
       enable_shared_with_static_runtimes_GCJ=yes
-      export_symbols_cmds_GCJ='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS] /s/.* \([^ ]*\)/\1 DATA/'\'' | $SED -e '\''/^[AITW] /s/.* //'\'' | sort | uniq > $export_symbols'
+      export_symbols_cmds_GCJ='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/'\'' -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
 
       if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
         archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
@@ -16956,7 +17129,7 @@
       fi
       ;;
 
-    interix3*)
+    interix[3-9]*)
       hardcode_direct_GCJ=no
       hardcode_shlibpath_var_GCJ=no
       hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir'
@@ -16971,7 +17144,7 @@
       archive_expsym_cmds_GCJ='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
       ;;
 
-    linux*)
+    gnu* | linux* | k*bsd*-gnu)
       if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
 	tmp_addflag=
 	case $cc_basename,$host_cpu in
@@ -16989,13 +17162,22 @@
 	ifc* | ifort*)			# Intel Fortran compiler
 	  tmp_addflag=' -nofor_main' ;;
 	esac
-	archive_cmds_GCJ='$CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	case `$CC -V 2>&1 | sed 5q` in
+	*Sun\ C*)			# Sun C 5.9
+	  whole_archive_flag_spec_GCJ='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_sharedflag='-G' ;;
+	*Sun\ F*)			# Sun Fortran 8.3
+	  tmp_sharedflag='-G' ;;
+	*)
+	  tmp_sharedflag='-shared' ;;
+	esac
+	archive_cmds_GCJ='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
 
 	if test $supports_anon_versioning = yes; then
 	  archive_expsym_cmds_GCJ='$echo "{ global:" > $output_objdir/$libname.ver~
   cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
   $echo "local: *; };" >> $output_objdir/$libname.ver~
-	  $CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	  $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
 	fi
       else
 	ld_shlibs_GCJ=no
@@ -17154,7 +17336,7 @@
   	   strings "$collect2name" | grep resolve_lib_name >/dev/null
 	  then
   	  # We have reworked collect2
-  	  hardcode_direct_GCJ=yes
+  	  :
 	  else
   	  # We have old collect2
   	  hardcode_direct_GCJ=unsupported
@@ -17228,11 +17410,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -17287,11 +17476,18 @@
        } && test -s conftest$ac_exeext &&
        $as_test_x conftest$ac_exeext; then
 
-aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`
+lt_aix_libpath_sed='
+    /Import File Strings/,/^$/ {
+	/^0/ {
+	    s/^0  *\(.*\)$/\1/
+	    p
+	}
+    }'
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
 # Check for a 64-bit object if we didn't find anything.
-if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
-}'`; fi
+if test -z "$aix_libpath"; then
+  aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
+fi
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
@@ -17345,7 +17541,7 @@
       # The linker will automatically build a .lib file if we build a DLL.
       old_archive_From_new_cmds_GCJ='true'
       # FIXME: Should let the user specify the lib program.
-      old_archive_cmds_GCJ='lib /OUT:$oldlib$oldobjs$old_deplibs'
+      old_archive_cmds_GCJ='lib -OUT:$oldlib$oldobjs$old_deplibs'
       fix_srcfile_path_GCJ='`cygpath -w "$srcfile"`'
       enable_shared_with_static_runtimes_GCJ=yes
       ;;
@@ -17387,10 +17583,10 @@
       case $cc_basename in
         xlc*)
          output_verbose_link_cmd='echo'
-         archive_cmds_GCJ='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+         archive_cmds_GCJ='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring'
          module_cmds_GCJ='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
           # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
-         archive_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+         archive_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           module_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
           ;;
        *)
@@ -17430,7 +17626,7 @@
       ;;
 
     # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
-    freebsd* | kfreebsd*-gnu | dragonfly*)
+    freebsd* | dragonfly*)
       archive_cmds_GCJ='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
       hardcode_libdir_flag_spec_GCJ='-R$libdir'
       hardcode_direct_GCJ=yes
@@ -17552,24 +17748,28 @@
       ;;
 
     openbsd*)
-      hardcode_direct_GCJ=yes
-      hardcode_shlibpath_var_GCJ=no
-      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
-	archive_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	archive_expsym_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
-	hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir'
-	export_dynamic_flag_spec_GCJ='${wl}-E'
+      if test -f /usr/libexec/ld.so; then
+	hardcode_direct_GCJ=yes
+	hardcode_shlibpath_var_GCJ=no
+	if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	  archive_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	  archive_expsym_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	  hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir'
+	  export_dynamic_flag_spec_GCJ='${wl}-E'
+	else
+	  case $host_os in
+	   openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+	     archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	     hardcode_libdir_flag_spec_GCJ='-R$libdir'
+	     ;;
+	   *)
+	     archive_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	     hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir'
+	     ;;
+	  esac
+        fi
       else
-       case $host_os in
-	 openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
-	   archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
-	   hardcode_libdir_flag_spec_GCJ='-R$libdir'
-	   ;;
-	 *)
-	   archive_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
-	   hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir'
-	   ;;
-       esac
+	ld_shlibs_GCJ=no
       fi
       ;;
 
@@ -17628,17 +17828,16 @@
       case $host_os in
       solaris2.[0-5] | solaris2.[0-5].*) ;;
       *)
- 	# The compiler driver will combine linker options so we
- 	# cannot just pass the convience library names through
- 	# without $wl, iff we do not link with $LD.
- 	# Luckily, gcc supports the same syntax we need for Sun Studio.
+	# The compiler driver will combine and reorder linker options,
+	# but understands `-z linker_flag'.  GCC discards it without `$wl',
+	# but is careful enough not to reorder.
  	# Supported since Solaris 2.6 (maybe 2.5.1?)
- 	case $wlarc in
- 	'')
- 	  whole_archive_flag_spec_GCJ='-z allextract$convenience -z defaultextract' ;;
- 	*)
- 	  whole_archive_flag_spec_GCJ='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract' ;;
- 	esac ;;
+	if test "$GCC" = yes; then
+	  whole_archive_flag_spec_GCJ='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
+	else
+	  whole_archive_flag_spec_GCJ='-z allextract$convenience -z defaultextract'
+	fi
+	;;
       esac
       link_all_deplibs_GCJ=yes
       ;;
@@ -17695,7 +17894,7 @@
       fi
       ;;
 
-    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7*)
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
       no_undefined_flag_GCJ='${wl}-z,text'
       archive_cmds_need_lc_GCJ=no
       hardcode_shlibpath_var_GCJ=no
@@ -17772,7 +17971,7 @@
       { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5
 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; }
       $rm conftest*
-      printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+      echo "$lt_simple_compile_test_code" > conftest.$ac_ext
 
       if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
   (eval $ac_compile) 2>&5
@@ -17830,20 +18029,7 @@
 version_type=none
 dynamic_linker="$host_os ld.so"
 sys_lib_dlsearch_path_spec="/lib /usr/lib"
-if test "$GCC" = yes; then
-  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
-  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
-    # if the path contains ";" then we assume it to be the separator
-    # otherwise default to the standard path separator (i.e. ":") - it is
-    # assumed that no part of a normal pathname contains ";" but that should
-    # okay in the real world where ";" in dirpaths is itself problematic.
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
-  else
-    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
-  fi
-else
-  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
-fi
+
 need_lib_prefix=unknown
 hardcode_into_libs=no
 
@@ -18000,12 +18186,7 @@
   shlibpath_overrides_runpath=yes
   shlibpath_var=DYLD_LIBRARY_PATH
   shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
-  if test "$GCC" = yes; then
-    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
-  else
-    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
-  fi
+
   sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
   ;;
 
@@ -18022,18 +18203,6 @@
   dynamic_linker=no
   ;;
 
-kfreebsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 freebsd* | dragonfly*)
   # DragonFly does not have aout.  When/if they implement a new
   # versioning mechanism, adjust this.
@@ -18071,7 +18240,7 @@
     shlibpath_overrides_runpath=no
     hardcode_into_libs=yes
     ;;
-  freebsd*) # from 4.6 on
+  *) # from 4.6 on, and DragonFly
     shlibpath_overrides_runpath=yes
     hardcode_into_libs=yes
     ;;
@@ -18134,7 +18303,7 @@
   postinstall_cmds='chmod 555 $lib'
   ;;
 
-interix3*)
+interix[3-9]*)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -18189,7 +18358,7 @@
   ;;
 
 # This must be Linux ELF.
-linux*)
+linux* | k*bsd*-gnu)
   version_type=linux
   need_lib_prefix=no
   need_version=no
@@ -18205,7 +18374,7 @@
 
   # Append ld.so.conf contents to the search path
   if test -f /etc/ld.so.conf; then
-    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ 	]*hwcap[ 	]/d;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
     sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
   fi
 
@@ -18218,18 +18387,6 @@
   dynamic_linker='GNU/Linux ld.so'
   ;;
 
-knetbsd*-gnu)
-  version_type=linux
-  need_lib_prefix=no
-  need_version=no
-  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
-  soname_spec='${libname}${release}${shared_ext}$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  shlibpath_overrides_runpath=no
-  hardcode_into_libs=yes
-  dynamic_linker='GNU ld.so'
-  ;;
-
 netbsd*)
   version_type=sunos
   need_lib_prefix=no
@@ -18311,6 +18468,10 @@
   sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
   ;;
 
+rdos*)
+  dynamic_linker=no
+  ;;
+
 solaris*)
   version_type=linux
   need_lib_prefix=no
@@ -18505,6 +18666,7 @@
     module_cmds_GCJ \
     module_expsym_cmds_GCJ \
     lt_cv_prog_compiler_c_o_GCJ \
+    fix_srcfile_path_GCJ \
     exclude_expsyms_GCJ \
     include_expsyms_GCJ; do
 
@@ -18825,7 +18987,7 @@
 sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
 
 # Fix the shell variable \$srcfile for the compiler.
-fix_srcfile_path="$fix_srcfile_path_GCJ"
+fix_srcfile_path=$lt_fix_srcfile_path
 
 # Set to yes if exported symbols are required.
 always_export_symbols=$always_export_symbols_GCJ
@@ -18882,7 +19044,7 @@
 objext_RC=$objext
 
 # Code to be used in simple compile tests
-lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }\n'
+lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }'
 
 # Code to be used in simple link tests
 lt_simple_link_test_code="$lt_simple_compile_test_code"
@@ -18901,13 +19063,13 @@
 
 # save warnings/boilerplate of simple test code
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
 eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_compiler_boilerplate=`cat conftest.err`
 $rm conftest*
 
 ac_outfile=conftest.$ac_objext
-printf "$lt_simple_link_test_code" >conftest.$ac_ext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
 eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
 _lt_linker_boilerplate=`cat conftest.err`
 $rm conftest*
@@ -18985,6 +19147,7 @@
     module_cmds_RC \
     module_expsym_cmds_RC \
     lt_cv_prog_compiler_c_o_RC \
+    fix_srcfile_path_RC \
     exclude_expsyms_RC \
     include_expsyms_RC; do
 
@@ -19305,7 +19468,7 @@
 sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
 
 # Fix the shell variable \$srcfile for the compiler.
-fix_srcfile_path="$fix_srcfile_path_RC"
+fix_srcfile_path=$lt_fix_srcfile_path
 
 # Set to yes if exported symbols are required.
 always_export_symbols=$always_export_symbols_RC
@@ -20558,7 +20721,7 @@
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-#include "pnggccrd.c"
+#include "$srcdir/pnggccrd.c"
 int
 main ()
 {
@@ -21119,7 +21282,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by libpng $as_me 1.0.26, which was
+This file was extended by libpng $as_me 1.0.27rc1, which was
 generated by GNU Autoconf 2.61.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21172,7 +21335,7 @@
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-libpng config.status 1.0.26
+libpng config.status 1.0.27rc1
 configured by $0, generated by GNU Autoconf 2.61,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -21429,6 +21592,7 @@
 CCDEPMODE!$CCDEPMODE$ac_delim
 am__fastdepCC_TRUE!$am__fastdepCC_TRUE$ac_delim
 am__fastdepCC_FALSE!$am__fastdepCC_FALSE$ac_delim
+SED!$SED$ac_delim
 build!$build$ac_delim
 build_cpu!$build_cpu$ac_delim
 build_vendor!$build_vendor$ac_delim
@@ -21440,7 +21604,6 @@
 GREP!$GREP$ac_delim
 EGREP!$EGREP$ac_delim
 CPP!$CPP$ac_delim
-SED!$SED$ac_delim
 LN_S!$LN_S$ac_delim
 ECHO!$ECHO$ac_delim
 AR!$AR$ac_delim
diff --git a/configure.ac b/configure.ac
index 3fde35a..6f7d072 100644
--- a/configure.ac
+++ b/configure.ac
@@ -18,15 +18,15 @@
 
 dnl Version number stuff here:
 
-AC_INIT([libpng], [1.0.26], [png-mng-implement@lists.sourceforge.net])
+AC_INIT([libpng], [1.0.27rc1], [png-mng-implement@lists.sourceforge.net])
 AM_INIT_AUTOMAKE
 dnl stop configure from automagically running automake
 AM_MAINTAINER_MODE
 
-PNGLIB_VERSION=1.0.26
+PNGLIB_VERSION=1.0.27rc1
 PNGLIB_MAJOR=1
 PNGLIB_MINOR=0
-PNGLIB_RELEASE=26
+PNGLIB_RELEASE=27
 
 dnl End of version number stuff
 
@@ -62,7 +62,7 @@
 AC_MSG_CHECKING(
   [if assembler code in pnggccrd.c can be compiled without PNG_NO_MMX_CODE])
 AC_TRY_COMPILE(
-  [#include "pnggccrd.c"],
+  [#include "$srcdir/pnggccrd.c"],
   [return 0;],
   AC_MSG_RESULT(yes)
   LIBPNG_NO_MMX="",
diff --git a/contrib/gregbook/COPYING b/contrib/gregbook/COPYING
new file mode 100644
index 0000000..d60c31a
--- /dev/null
+++ b/contrib/gregbook/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/contrib/gregbook/LICENSE b/contrib/gregbook/LICENSE
index 175ebfd..a39264e 100644
--- a/contrib/gregbook/LICENSE
+++ b/contrib/gregbook/LICENSE
@@ -1,12 +1,19 @@
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2001 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -23,4 +30,21 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------
diff --git a/contrib/gregbook/Makefile.mingw32 b/contrib/gregbook/Makefile.mingw32
new file mode 100644
index 0000000..e70a59a
--- /dev/null
+++ b/contrib/gregbook/Makefile.mingw32
@@ -0,0 +1,130 @@
+# Sample makefile for rpng-win / rpng2-win / wpng using mingw32-gcc and make.
+# Greg Roelofs
+# Last modified:  2 June 2007
+#
+#	The programs built by this makefile are described in the book,
+#	"PNG:  The Definitive Guide," by Greg Roelofs (O'Reilly and
+#	Associates, 1999).  Go buy a copy, eh?  Well, OK, it's not
+#	generally for sale anymore, but it's the thought that counts,
+#	right?  (Hint:  http://www.libpng.org/pub/png/book/ )
+#
+# Invoke this makefile from a DOS-prompt window via:
+#
+#	make -f Makefile.mingw32
+#
+# This makefile assumes libpng and zlib have already been built or downloaded
+# and are in subdirectories at the same level as the current subdirectory
+# (as indicated by the PNGDIR and ZDIR macros below).  It makes no assumptions
+# at all about the mingw32 installation tree (W32DIR).  Edit as appropriate.
+#
+# Note that the names of the dynamic and static libpng and zlib libraries
+# used below may change in later releases of the libraries.  This makefile
+# builds both statically and dynamically linked executables by default.
+# (You need only one set, but for testing it can be handy to have both.)
+
+
+# macros --------------------------------------------------------------------
+
+#PNGDIR = ../..#		for libpng-x.y.z/contrib/gregbook builds
+PNGDIR = ../libpng-win32
+PNGINC = -I$(PNGDIR)
+PNGLIBd = $(PNGDIR)/libpng.dll.a	# dynamically linked
+PNGLIBs = $(PNGDIR)/libpng.a		# statically linked, local libpng
+
+#ZDIR = ../../../zlib-win32#	for libpng-x.y.z/contrib/gregbook builds
+ZDIR = ../zlib-win32
+ZINC = -I$(ZDIR)
+ZLIBd = $(ZDIR)/libzdll.a
+ZLIBs = $(ZDIR)/libz.a
+
+# change this to be the path where mingw32 installs its stuff:
+W32DIR =
+#W32DIR = /usr/local/cross-tools/i386-mingw32msvc
+W32INC = -I$(W32DIR)/include
+W32LIB = $(W32DIR)/lib/libuser32.a $(W32DIR)/lib/libgdi32.a
+
+CC = gcc
+#CC = i386-mingw32msvc-gcc #	e.g., Linux -> Win32 cross-compilation
+LD = $(CC)
+RM = rm -f
+CFLAGS = -O -Wall $(INCS) $(MINGW_CCFLAGS)
+# [note that -Wall is a gcc-specific compilation flag ("most warnings on")]
+# [-ansi, -pedantic and -W can also be used]
+LDFLAGS = $(MINGW_LDFLAGS)
+O = .o
+E = .exe
+
+INCS = $(PNGINC) $(ZINC) $(W32INC)
+RLIBSd = $(PNGLIBd) $(ZLIBd) $(W32LIB) -lm
+RLIBSs = $(PNGLIBs) $(ZLIBs) $(W32LIB) -lm
+WLIBSd = $(PNGLIBd) $(ZLIBd)
+WLIBSs = $(PNGLIBs) $(ZLIBs)
+
+RPNG   = rpng-win
+RPNG2  = rpng2-win
+WPNG   = wpng
+
+ROBJSd  = $(RPNG)$(O) readpng.pic$(O)
+ROBJS2d = $(RPNG2)$(O) readpng2.pic$(O)
+WOBJSd  = $(WPNG)$(O) writepng.pic$(O)
+
+RPNGs  = $(RPNG)-static
+RPNG2s = $(RPNG2)-static
+WPNGs  = $(WPNG)-static
+
+ROBJSs  = $(RPNG)$(O) readpng$(O)
+ROBJS2s = $(RPNG2)$(O) readpng2$(O)
+WOBJSs  = $(WPNG)$(O) writepng$(O)
+
+STATIC_EXES  = $(RPNGs)$(E) $(RPNG2s)$(E) $(WPNGs)$(E)
+DYNAMIC_EXES = $(RPNG)$(E) $(RPNG2)$(E) $(WPNG)$(E)
+
+EXES = $(STATIC_EXES) $(DYNAMIC_EXES)
+
+
+# implicit make rules -------------------------------------------------------
+
+.c$(O):
+	$(CC) -c $(CFLAGS) $<
+
+%.pic$(O): %.c
+	$(CC) -c $(CFLAGS) -DPNG_BUILD_DLL -o $@ $<
+
+
+# dependencies --------------------------------------------------------------
+
+all:  $(EXES)
+
+$(RPNGs)$(E): $(ROBJSs)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJSs) $(RLIBSs)
+
+$(RPNG)$(E): $(ROBJSd)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJSd) $(RLIBSd)
+
+$(RPNG2s)$(E): $(ROBJS2s)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJS2s) $(RLIBSs)
+
+$(RPNG2)$(E): $(ROBJS2d)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJS2d) $(RLIBSd)
+
+$(WPNGs)$(E): $(WOBJSs)
+	$(LD) $(LDFLAGS) -o $@ $(WOBJSs) $(WLIBSs)
+
+$(WPNG)$(E): $(WOBJSd)
+	$(LD) $(LDFLAGS) -o $@ $(WOBJSd) $(WLIBSd)
+
+$(RPNG)$(O):	$(RPNG).c readpng.h
+$(RPNG2)$(O):	$(RPNG2).c readpng2.h
+$(WPNG)$(O):	$(WPNG).c writepng.h
+
+readpng$(O) readpng.pic$(O):	readpng.c readpng.h
+readpng2$(O) readpng2.pic$(O):	readpng2.c readpng2.h
+writepng$(O) writepng.pic$(O):	writepng.c writepng.h
+
+
+# maintenance ---------------------------------------------------------------
+
+clean:
+	$(RM) $(EXES)
+	$(RM) $(ROBJSs) $(ROBJS2s) $(WOBJSs)
+	$(RM) $(ROBJSd) $(ROBJS2d) $(WOBJSd)
diff --git a/contrib/gregbook/Makefile.unx b/contrib/gregbook/Makefile.unx
index c0c3fb1..7ff65bf 100644
--- a/contrib/gregbook/Makefile.unx
+++ b/contrib/gregbook/Makefile.unx
@@ -1,73 +1,92 @@
 # Sample makefile for rpng-x / rpng2-x / wpng using gcc and make.
 # Greg Roelofs
-# Last modified:  7 March 2002
+# Last modified:  2 June 2007
 #
 #	The programs built by this makefile are described in the book,
 #	"PNG:  The Definitive Guide," by Greg Roelofs (O'Reilly and
-#	Associates, 1999).  Go buy a copy, eh?  Buy some for friends
-#	and family, too.  (Not that this is a blatant plug or anything.)
+#	Associates, 1999).  Go buy a copy, eh?  Well, OK, it's not
+#	generally for sale anymore, but it's the thought that counts,
+#	right?  (Hint:  http://www.libpng.org/pub/png/book/ )
 #
 # Invoke this makefile from a shell prompt in the usual way; for example:
 #
 #	make -f Makefile.unx
 #
 # This makefile assumes libpng and zlib have already been built or downloaded
-# and are both installed in /usr/local/{include,lib} (as indicated by the
-# PNG* and Z* macros below).  Edit as appropriate--choose only ONE each of
-# the PNGINC, PNGLIB, ZINC and ZLIB lines.
+# and are installed in /usr/local/{include,lib} or as otherwise indicated by
+# the PNG* and Z* macros below.  Edit as appropriate--choose only ONE each of
+# the PNGINC, PNGLIBd, PNGLIBs, ZINC, ZLIBd and ZLIBs lines.
 #
-# This makefile builds statically linked executables (against libpng and zlib,
-# that is), but that can be changed by uncommenting the appropriate PNGLIB and
-# ZLIB lines.
+# This makefile builds both dynamically and statically linked executables
+# (against libpng and zlib, that is), but that can be changed by modifying
+# the "EXES =" line.  (You need only one set, but for testing it can be handy
+# to have both.)
 
 
 # macros --------------------------------------------------------------------
 
-PNGINC = -I/usr/local/include/libpng12
-#PNGLIB = -L/usr/local/lib -lpng12 # dynamically linked against libpng
-PNGLIB = /usr/local/lib/libpng12.a # statically linked against libpng
+#PNGDIR = /usr/local/lib
+#PNGINC = -I/usr/local/include/libpng12
+#PNGLIBd = -L$(PNGDIR) -lpng12 # dynamically linked, installed libpng
+#PNGLIBs = $(PNGDIR)/libpng12.a # statically linked, installed libpng
 # or:
-#PNGINC = -I../libpng
-#PNGLIB = -L../libpng -lpng
-#PNGLIB = ../libpng/libpng.a
+PNGDIR = ../..#	this one is for libpng-x.y.z/contrib/gregbook builds
+#PNGDIR = ../libpng
+PNGINC = -I$(PNGDIR)
+PNGLIBd = -Wl,-rpath,$(PNGDIR) -L$(PNGDIR) -lpng12	# dynamically linked
+PNGLIBs = $(PNGDIR)/libpng.a		# statically linked, local libpng
 
+ZDIR = /usr/local/lib
+#ZDIR = /usr/lib64
 ZINC = -I/usr/local/include
-#ZLIB = -L/usr/local/lib -lz		# dynamically linked against zlib
-ZLIB = /usr/local/lib/libz.a		# statically linked against zlib
-#ZINC = -I../zlib
-#ZLIB = -L../zlib -lz
-#ZLIB = ../zlib/libz.a
+ZLIBd = -L$(ZDIR) -lz			# dynamically linked against zlib
+ZLIBs = $(ZDIR)/libz.a			# statically linked against zlib
+# or:
+#ZDIR = ../zlib
+#ZINC = -I$(ZDIR)
+#ZLIBd = -Wl,-rpath,$(ZDIR) -L$(ZDIR) -lz  # -rpath allows in-place testing
+#ZLIBs = $(ZDIR)/libz.a
 
 #XINC = -I/usr/include			# old-style, stock X distributions
-#XLIB = -L/usr/lib/X11 -lX11
+#XLIB = -L/usr/lib/X11 -lX11		#  (including SGI IRIX)
 #XINC = -I/usr/openwin/include		# Sun workstations (OpenWindows)
 #XLIB = -L/usr/openwin/lib -lX11
-XINC = -I/usr/X11R6/include		# new X distributions (XFree86, etc.)
+XINC = -I/usr/X11R6/include		# new X distributions (X.org, etc.)
 XLIB = -L/usr/X11R6/lib -lX11
+#XLIB = -L/usr/X11R6/lib64 -lX11	# e.g., Red Hat on AMD64
 
 INCS = $(PNGINC) $(ZINC) $(XINC)
-RLIBS = $(PNGLIB) $(ZLIB) $(XLIB) -lm
-WLIBS = $(PNGLIB) $(ZLIB)
+RLIBSd = $(PNGLIBd) $(ZLIBd) $(XLIB) -lm
+RLIBSs = $(PNGLIBs) $(ZLIBs) $(XLIB) -lm
+WLIBSd = $(PNGLIBd) $(ZLIBd) -lm
+WLIBSs = $(PNGLIBs) $(ZLIBs)
 
 CC = gcc
 LD = gcc
 RM = rm -f
-CFLAGS = -O -Wall $(INCS)
+CFLAGS = -O -Wall $(INCS) -DFEATURE_LOOP
 # [note that -Wall is a gcc-specific compilation flag ("most warnings on")]
 # [-ansi, -pedantic and -W can also be used]
 LDFLAGS =
 O = .o
 E =
 
-RPNG  = rpng-x
-RPNG2 = rpng2-x
-WPNG  = wpng
+RPNG   = rpng-x
+RPNG2  = rpng2-x
+WPNG   = wpng
+
+RPNGs  = $(RPNG)-static
+RPNG2s = $(RPNG2)-static
+WPNGs  = $(WPNG)-static
 
 ROBJS  = $(RPNG)$(O) readpng$(O)
 ROBJS2 = $(RPNG2)$(O) readpng2$(O)
 WOBJS  = $(WPNG)$(O) writepng$(O)
 
-EXES = $(RPNG)$(E) $(RPNG2)$(E) $(WPNG)$(E)
+STATIC_EXES  = $(RPNGs)$(E) $(RPNG2s)$(E) $(WPNGs)$(E)
+DYNAMIC_EXES = $(RPNG)$(E) $(RPNG2)$(E) $(WPNG)$(E)
+
+EXES = $(STATIC_EXES) $(DYNAMIC_EXES)
 
 
 # implicit make rules -------------------------------------------------------
@@ -80,14 +99,23 @@
 
 all:  $(EXES)
 
+$(RPNGs)$(E): $(ROBJS)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJS) $(RLIBSs)
+
 $(RPNG)$(E): $(ROBJS)
-	$(LD) $(LDFLAGS) -o $@ $(ROBJS) $(RLIBS)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJS) $(RLIBSd)
+
+$(RPNG2s)$(E): $(ROBJS2)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJS2) $(RLIBSs)
 
 $(RPNG2)$(E): $(ROBJS2)
-	$(LD) $(LDFLAGS) -o $@ $(ROBJS2) $(RLIBS)
+	$(LD) $(LDFLAGS) -o $@ $(ROBJS2) $(RLIBSd)
+
+$(WPNGs)$(E): $(WOBJS)
+	$(LD) $(LDFLAGS) -o $@ $(WOBJS) $(WLIBSs)
 
 $(WPNG)$(E): $(WOBJS)
-	$(LD) $(LDFLAGS) -o $@ $(WOBJS) $(WLIBS)
+	$(LD) $(LDFLAGS) -o $@ $(WOBJS) $(WLIBSd)
 
 $(RPNG)$(O):	$(RPNG).c readpng.h
 $(RPNG2)$(O):	$(RPNG2).c readpng2.h
diff --git a/contrib/gregbook/Makefile.w32 b/contrib/gregbook/Makefile.w32
index 62041cd..3c08085 100644
--- a/contrib/gregbook/Makefile.w32
+++ b/contrib/gregbook/Makefile.w32
@@ -1,11 +1,12 @@
 # Sample makefile for rpng-win / rpng2-win / wpng using MSVC and NMAKE.
 # Greg Roelofs
-# Last modified:  16 February 1999
+# Last modified:  2 June 2007
 #
 #	The programs built by this makefile are described in the book,
 #	"PNG:  The Definitive Guide," by Greg Roelofs (O'Reilly and
-#	Associates, 1999).  Go buy a copy, eh?  Buy some for friends
-#	and family, too.  (Not that this is a blatant plug or anything.)
+#	Associates, 1999).  Go buy a copy, eh?  Well, OK, it's not
+#	generally for sale anymore, but it's the thought that counts,
+#	right?  (Hint:  http://www.libpng.org/pub/png/book/ )
 #
 # Invoke this makefile from a DOS prompt window via:
 #
@@ -53,7 +54,7 @@
 LD = link
 RM = del
 CFLAGS = -nologo -O -W3 $(INCS) $(cvars)
-# [note that -Wall is an MSVC-specific compilation flag ("all warnings on")]
+# [note that -W3 is an MSVC-specific compilation flag ("all warnings on")]
 # [see %devstudio%\vc\include\win32.mak for cvars macro definition]
 O = .obj
 E = .exe
diff --git a/contrib/gregbook/README b/contrib/gregbook/README
index c67045c..5b08bc6 100644
--- a/contrib/gregbook/README
+++ b/contrib/gregbook/README
@@ -22,7 +22,8 @@
 Files:
 
    README             this file
-   LICENSE            terms of distribution and reuse (BSD-like)
+   LICENSE            terms of distribution and reuse (BSD-like or GNU GPL)
+   COPYING            GNU General Public License (GPL)
 
    Makefile.unx       Unix makefile
    Makefile.w32       Windows (MSVC) makefile
@@ -54,15 +55,15 @@
 13-15 of the book for detailed discussion of the three programs.
 
 Greg Roelofs
-newt@pobox.com
-30 June 2001
+http://pobox.com/~newt/greg_contact.html
+2 June 2007
 
 
 BUILD INSTRUCTIONS
 
  - Prerequisites (in order of compilation):
 
-      - zlib		http://www.gzip.org/zlib/
+      - zlib		http://zlib.net/
       - libpng		http://www.libpng.org/pub/png/libpng.html
       - pngbook		http://www.libpng.org/pub/png/book/sources.html
 
@@ -150,9 +151,9 @@
      To run the programs, they probably first need to be set up as "foreign
      symbols," with "disk" and "dir" set appropriately:
 
-     $ rpng == "$disk:[dir]rpng-x.exe"
+     $ rpng  == "$disk:[dir]rpng-x.exe"
      $ rpng2 == "$disk:[dir]rpng2-x.exe"
-     $ wpng == "$disk:[dir]wpng.exe"
+     $ wpng  == "$disk:[dir]wpng.exe"
 
      All three will print a basic usage screen when run without any command-
      line arguments; see the book for more details.  Note that the options
@@ -176,7 +177,7 @@
      wpng is a purely command-line image converter from binary PBMPLUS/NetPBM
      format (.pgm or .ppm) to PNG; for example,
 
-	wpng -time < toucan.ppm > toucan.png
+	wpng -time < toucan-notrans.ppm > toucan-notrans.png
 
      would convert the specified PPM file (using redirection) to PNG, auto-
      matically setting the PNG modification-time chunk.
diff --git a/contrib/gregbook/readpng.c b/contrib/gregbook/readpng.c
index b8e0904..d87f6c7 100644
--- a/contrib/gregbook/readpng.c
+++ b/contrib/gregbook/readpng.c
@@ -4,13 +4,20 @@
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2000 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -27,6 +34,23 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #include <stdio.h>
diff --git a/contrib/gregbook/readpng.h b/contrib/gregbook/readpng.h
index 1c19aca..fad9fe3 100644
--- a/contrib/gregbook/readpng.h
+++ b/contrib/gregbook/readpng.h
@@ -4,13 +4,20 @@
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2000 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -27,6 +34,23 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #ifndef TRUE
diff --git a/contrib/gregbook/readpng2.c b/contrib/gregbook/readpng2.c
index 9e66a0b..fd95667 100644
--- a/contrib/gregbook/readpng2.c
+++ b/contrib/gregbook/readpng2.c
@@ -4,19 +4,20 @@
 
   ---------------------------------------------------------------------------
 
-   Changelog:
-    - 1.01:  initial public release
-    - 1.02:  added code to skip unused chunks (GR-P)
-
-  ---------------------------------------------------------------------------
-
-      Copyright (c) 1998-2002 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -33,6 +34,23 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 
@@ -56,7 +74,7 @@
 void readpng2_version_info(void)
 {
 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && \
-    (defined(__i386__) || defined(_M_IX86)) && \
+    (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__)) && \
     defined(PNG_LIBPNG_VER) && (PNG_LIBPNG_VER >= 10200)
     /*
      * WARNING:  This preprocessor approach means that the following code
@@ -79,7 +97,12 @@
               "with MMX support\n   (%s version).", PNG_LIBPNG_VER_STRING,
               png_libpng_ver, compilerID == 1? "MSVC++" :
               (compilerID == 2? "GNU C" : "unknown"));
-            fprintf(stderr, "  Processor %s MMX instructions.\n",
+            fprintf(stderr, "  Processor (x86%s) %s MMX instructions.\n",
+#if defined(__x86_64__)
+              "_64"
+#else
+              "",
+#endif
               mmxsupport? "supports" : "does not support");
             if (mmxsupport > 0) {
                 int num_optims = 0;
@@ -179,40 +202,38 @@
         return 2;
     }
 
-    /* prepare the reader to ignore all recognized chunks whose data isn't
-     * going to be used, i.e., all chunks recognized by libpng except for
-     * IHDR, PLTE, IDAT, IEND, tRNS, bKGD, gAMA, and sRGB : */
 
-#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
+    /* prepare the reader to ignore all recognized chunks whose data won't be
+     * used, i.e., all chunks recognized by libpng except for IHDR, PLTE, IDAT,
+     * IEND, tRNS, bKGD, gAMA, and sRGB (small performance improvement) */
     {
-#ifndef HANDLE_CHUNK_NEVER
-/* prior to libpng-1.2.5, this macro was internal, so we define it here. */
-# define HANDLE_CHUNK_NEVER 1
-#endif
-       /* these byte strings were copied from png.h.
-        * If a future libpng version recognizes more chunks, add them
-        * to this list.  If a future version of readpng2.c recognizes
-        * more chunks, delete them from this list. */
-       png_byte png_chunk_types_to_ignore[]=
-          { 99,  72,  82,  77, '\0', /* cHRM */
-           104,  73,  83,  84, '\0', /* hIST */
-           105,  67,  67,  80, '\0', /* iCCP */
-           105,  84,  88, 116, '\0', /* iTXt */
-           111,  70,  70, 115, '\0', /* oFFs */
-           112,  67,  65,  76, '\0', /* pCAL */
-           115,  67,  65,  76, '\0', /* sCAL */
-           112,  72,  89, 115, '\0', /* pHYs */
-           115,  66,  73,  84, '\0', /* sBIT */
-           115,  80,  76,  84, '\0', /* sPLT */
-           116,  69,  88, 116, '\0', /* tEXt */
-           116,  73,  77,  69, '\0', /* tIME */
-           122,  84,  88, 116, '\0'}; /* zTXt */
-#define NUM_PNG_CHUNK_TYPES_TO_IGNORE 13
+        /* These byte strings were copied from png.h.  If a future libpng
+         * version recognizes more chunks, add them to this list.  If a
+         * future version of readpng2.c recognizes more chunks, delete them
+         * from this list. */
+        static const png_byte chunks_to_ignore[] = {
+             99,  72,  82,  77, '\0',  /* cHRM */
+            104,  73,  83,  84, '\0',  /* hIST */
+            105,  67,  67,  80, '\0',  /* iCCP */
+            105,  84,  88, 116, '\0',  /* iTXt */
+            111,  70,  70, 115, '\0',  /* oFFs */
+            112,  67,  65,  76, '\0',  /* pCAL */
+            112,  72,  89, 115, '\0',  /* pHYs */
+            115,  66,  73,  84, '\0',  /* sBIT */
+            115,  67,  65,  76, '\0',  /* sCAL */
+            115,  80,  76,  84, '\0',  /* sPLT */
+            115,  84,  69,  82, '\0',  /* sTER */
+            116,  69,  88, 116, '\0',  /* tEXt */
+            116,  73,  77,  69, '\0',  /* tIME */
+            122,  84,  88, 116, '\0'   /* zTXt */
+        };
 
-    png_set_keep_unknown_chunks(png_ptr, HANDLE_CHUNK_NEVER,
-        png_chunk_types_to_ignore, NUM_PNG_CHUNK_TYPES_TO_IGNORE);
+        png_set_keep_unknown_chunks(png_ptr, 1 /* PNG_HANDLE_CHUNK_NEVER */,
+          chunks_to_ignore, sizeof(chunks_to_ignore)/5);
     }
-#endif
+#endif /* PNG_UNKNOWN_CHUNKS_SUPPORTED */
+
 
     /* instead of doing png_init_io() here, now we set up our callback
      * functions for progressive decoding */
@@ -237,7 +258,7 @@
      *                  png_set_asm_flags (png_ptr, flags);
      */
 
-#if (defined(__i386__) || defined(_M_IX86)) && \
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__)) && \
     defined(PNG_LIBPNG_VER) && (PNG_LIBPNG_VER >= 10200)
     /*
      * WARNING:  This preprocessor approach means that the following code
diff --git a/contrib/gregbook/readpng2.h b/contrib/gregbook/readpng2.h
index 7caa9d9..386e5ee 100644
--- a/contrib/gregbook/readpng2.h
+++ b/contrib/gregbook/readpng2.h
@@ -4,13 +4,20 @@
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2001 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -27,6 +34,23 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #ifndef TRUE
@@ -66,7 +90,7 @@
     int rowbytes;
     int channels;
     int need_bgcolor;
-#if (defined(__i386__) || defined(_M_IX86))
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
     int nommxfilters;
     int nommxcombine;
     int nommxinterlace;
diff --git a/contrib/gregbook/readppm.c b/contrib/gregbook/readppm.c
new file mode 100644
index 0000000..be9a56d
--- /dev/null
+++ b/contrib/gregbook/readppm.c
@@ -0,0 +1,179 @@
+/*---------------------------------------------------------------------------
+
+   rpng - simple PNG display program                              readppm.c
+
+  ---------------------------------------------------------------------------
+
+   This is a special-purpose replacement for readpng.c that allows binary
+   PPM files to be used in place of PNG images.
+
+  ---------------------------------------------------------------------------
+
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
+
+      This software is provided "as is," without warranty of any kind,
+      express or implied.  In no event shall the author or contributors
+      be held liable for any damages arising in any way from the use of
+      this software.
+
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
+      Permission is granted to anyone to use this software for any purpose,
+      including commercial applications, and to alter it and redistribute
+      it freely, subject to the following restrictions:
+
+      1. Redistributions of source code must retain the above copyright
+         notice, disclaimer, and this list of conditions.
+      2. Redistributions in binary form must reproduce the above copyright
+         notice, disclaimer, and this list of conditions in the documenta-
+         tion and/or other materials provided with the distribution.
+      3. All advertising materials mentioning features or use of this
+         software must display the following acknowledgment:
+
+            This product includes software developed by Greg Roelofs
+            and contributors for the book, "PNG: The Definitive Guide,"
+            published by O'Reilly and Associates.
+
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  ---------------------------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "readpng.h"    /* typedefs, common macros, public prototypes */
+
+
+ulg  width, height;
+int  bit_depth, color_type, channels;
+uch  *image_data = NULL;
+FILE *saved_infile;
+
+
+void readpng_version_info()
+{
+    fprintf(stderr, "   Compiled without libpng, zlib or PBMPLUS/NetPBM.\n");
+}
+
+
+/* return value = 0 for success, 1 for bad sig, 2 for bad IHDR, 4 for no mem */
+
+int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight)
+{
+    static uch ppmline[256];
+    int maxval;
+
+
+    saved_infile = infile;
+
+    fgets(ppmline, 256, infile);
+    if (ppmline[0] != 'P' || ppmline[1] != '6') {
+        fprintf(stderr, "ERROR:  not a PPM file\n");
+        return 1;
+    }
+    /* possible color types:  P5 = grayscale (0), P6 = RGB (2), P8 = RGBA (6) */
+    if (ppmline[1] == '6') {
+        color_type = 2;
+        channels = 3;
+    } else if (ppmline[1] == '8') {
+        color_type = 6;
+        channels = 4;
+    } else /* if (ppmline[1] == '5') */ {
+        color_type = 0;
+        channels = 1;
+    }
+
+    do {
+        fgets(ppmline, 256, infile);
+    } while (ppmline[0] == '#');
+    sscanf(ppmline, "%lu %lu", &width, &height);
+
+    do {
+        fgets(ppmline, 256, infile);
+    } while (ppmline[0] == '#');
+    sscanf(ppmline, "%d", &maxval);
+    if (maxval != 255) {
+        fprintf(stderr, "ERROR:  maxval = %d\n", maxval);
+        return 2;
+    }
+    bit_depth = 8;
+
+    *pWidth = width;
+    *pHeight = height;
+
+    return 0;
+}
+
+
+
+
+/* returns 0 if succeeds, 1 if fails due to no bKGD chunk, 2 if libpng error;
+ * scales values to 8-bit if necessary */
+
+int readpng_get_bgcolor(uch *red, uch *green, uch *blue)
+{
+    return 1;
+}
+
+
+
+
+/* display_exponent == LUT_exponent * CRT_exponent */
+
+uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes)
+{
+    ulg  rowbytes;
+
+
+    /* expand palette images to RGB, low-bit-depth grayscale images to 8 bits,
+     * transparency chunks to full alpha channel; strip 16-bit-per-sample
+     * images to 8 bits per sample; and convert grayscale to RGB[A] */
+
+    /* GRR WARNING:  grayscale needs to be expanded and channels reset! */
+
+    *pRowbytes = rowbytes = channels*width;
+    *pChannels = channels;
+
+    if ((image_data = (uch *)malloc(rowbytes*height)) == NULL) {
+        return NULL;
+    }
+
+    Trace((stderr, "readpng_get_image:  rowbytes = %ld, height = %ld\n", rowbytes, height));
+
+
+    /* now we can go ahead and just read the whole image */
+
+    fread(image_data, 1L, rowbytes*height, saved_infile);
+
+
+    return image_data;
+}
+
+
+void readpng_cleanup(int free_image_data)
+{
+    if (free_image_data && image_data) {
+        free(image_data);
+        image_data = NULL;
+    }
+}
diff --git a/contrib/gregbook/rpng-win.c b/contrib/gregbook/rpng-win.c
index b84a7fc..b37c00a 100644
--- a/contrib/gregbook/rpng-win.c
+++ b/contrib/gregbook/rpng-win.c
@@ -22,16 +22,24 @@
     - 1.02:  added extra set of parentheses to png_jmpbuf() macro; fixed
               command-line parsing bug
     - 1.10:  enabled "message window"/console (thanks to David Geldreich)
+    - 2.00:  dual-licensed (added GNU GPL)
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2001 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -48,11 +56,28 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #define PROGNAME  "rpng-win"
 #define LONGNAME  "Simple PNG Viewer for Windows"
-#define VERSION   "1.20 of 28 May 2001"
+#define VERSION   "2.00 of 2 June 2007"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -85,10 +110,9 @@
 LRESULT CALLBACK  rpng_win_wndproc(HWND, UINT, WPARAM, LPARAM);
 
 
-static char titlebar[1024], *window_name = titlebar;
+static char titlebar[1024];
 static char *progname = PROGNAME;
 static char *appname = LONGNAME;
-static char *icon_name = PROGNAME;     /* GRR:  not (yet) used */
 static char *filename;
 static FILE *infile;
 
@@ -325,9 +349,14 @@
      * check for one in the PNG file--if not, the initialized values of 0
      * (black) will be used */
 
-    if (have_bg)
-        sscanf(bgstr+1, "%2x%2x%2x", &bg_red, &bg_green, &bg_blue);
-    else if (readpng_get_bgcolor(&bg_red, &bg_green, &bg_blue) > 1) {
+    if (have_bg) {
+        unsigned r, g, b;   /* this approach quiets compiler warnings */
+
+        sscanf(bgstr+1, "%2x%2x%2x", &r, &g, &b);
+        bg_red   = (uch)r;
+        bg_green = (uch)g;
+        bg_blue  = (uch)b;
+    } else if (readpng_get_bgcolor(&bg_red, &bg_green, &bg_blue) > 1) {
         readpng_cleanup(TRUE);
         fprintf(stderr, PROGNAME
           ":  libpng error while checking for background color\n");
diff --git a/contrib/gregbook/rpng-x.c b/contrib/gregbook/rpng-x.c
index e787ef1..9477260 100644
--- a/contrib/gregbook/rpng-x.c
+++ b/contrib/gregbook/rpng-x.c
@@ -9,7 +9,7 @@
    by Martin Zinser under OpenVMS; may work under OS/2 with some tweaking).
 
    to do:
-    - 8-bit support
+    - 8-bit (colormapped) X support
     - use %.1023s to simplify truncation of title-bar string?
 
   ---------------------------------------------------------------------------
@@ -21,18 +21,27 @@
     - 1.10:  added support for non-default visuals; fixed X pixel-conversion
     - 1.11:  added extra set of parentheses to png_jmpbuf() macro; fixed
               command-line parsing bug
-    - 1.12:  fixed small X memory leak (thanks to Francois Petitjean)
-    - 1.13:  fixed XFreeGC() crash bug
+    - 1.12:  fixed some small X memory leaks (thanks to François Petitjean)
+    - 1.13:  fixed XFreeGC() crash bug (thanks to Patrick Welche)
+    - 1.14:  added support for X resources (thanks to Gerhard Niklasch)
+    - 2.00:  dual-licensed (added GNU GPL)
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2001 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -49,11 +58,30 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #define PROGNAME  "rpng-x"
 #define LONGNAME  "Simple PNG Viewer for X"
-#define VERSION   "1.13 of 16 August 2001"
+#define VERSION   "2.00 of 2 June 2007"
+#define RESNAME   "rpng"	/* our X resource application name */
+#define RESCLASS  "Rpng"	/* our X resource class name */
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -91,6 +119,8 @@
 static char titlebar[1024], *window_name = titlebar;
 static char *appname = LONGNAME;
 static char *icon_name = PROGNAME;
+static char *res_name = RESNAME;
+static char *res_class = RESCLASS;
 static char *filename;
 static FILE *infile;
 
@@ -399,11 +429,12 @@
     XEvent e;
     XGCValues gcvalues;
     XSetWindowAttributes attr;
-    XSizeHints *size_hints;
     XTextProperty windowName, *pWindowName = &windowName;
     XTextProperty iconName, *pIconName = &iconName;
     XVisualInfo visual_info;
+    XSizeHints *size_hints;
     XWMHints *wm_hints;
+    XClassHint *class_hints;
 
 
     screen = DefaultScreen(display);
@@ -526,7 +557,7 @@
     if (!XStringListToTextProperty(&icon_name, 1, pIconName))
         pIconName = NULL;
 
-    /* OK if either hints allocation fails; XSetWMProperties() allows NULLs */
+    /* OK if any hints allocation fails; XSetWMProperties() allows NULLs */
 
     if ((size_hints = XAllocSizeHints()) != NULL) {
         /* window will not be resizable */
@@ -542,8 +573,13 @@
         wm_hints->flags = StateHint | InputHint  /* | IconPixmapHint */ ;
     }
 
+    if ((class_hints = XAllocClassHint()) != NULL) {
+        class_hints->res_name = res_name;
+        class_hints->res_class = res_class;
+    }
+
     XSetWMProperties(display, window, pWindowName, pIconName, NULL, 0,
-      size_hints, wm_hints, NULL);
+      size_hints, wm_hints, class_hints);
 
     /* various properties and hints no longer needed; free memory */
     if (pWindowName)
@@ -554,6 +590,8 @@
         XFree(size_hints);
     if (wm_hints)
        XFree(wm_hints);
+    if (class_hints)
+       XFree(class_hints);
 
     XMapWindow(display, window);
 
diff --git a/contrib/gregbook/rpng2-win.c b/contrib/gregbook/rpng2-win.c
index 0c1a9d1..dfbd6b1 100644
--- a/contrib/gregbook/rpng2-win.c
+++ b/contrib/gregbook/rpng2-win.c
@@ -27,16 +27,25 @@
     - 1.10:  enabled "message window"/console (thanks to David Geldreich)
     - 1.20:  added runtime MMX-enabling/disabling and new -mmx* options
     - 1.21:  made minor tweak to usage screen to fit within 25-line console
+    - 1.22:  added AMD64/EM64T support (__x86_64__)
+    - 2.00:  dual-licensed (added GNU GPL)
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2001 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -53,11 +62,28 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #define PROGNAME  "rpng2-win"
 #define LONGNAME  "Progressive PNG Viewer for Windows"
-#define VERSION   "1.21 of 29 June 2001"
+#define VERSION   "2.00 of 2 June 2007"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -119,10 +145,9 @@
 LRESULT CALLBACK  rpng2_win_wndproc(HWND, UINT, WPARAM, LPARAM);
 
 
-static char titlebar[1024], *window_name = titlebar;
+static char titlebar[1024];
 static char *progname = PROGNAME;
 static char *appname = LONGNAME;
-static char *icon_name = PROGNAME;    /* GRR:  not (yet) used */
 static char *filename;
 static FILE *infile;
 
@@ -367,7 +392,7 @@
             }
         } else if (!strncmp(*argv, "-timing", 2)) {
             timing = TRUE;
-#if (defined(__i386__) || defined(_M_IX86))
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
         } else if (!strncmp(*argv, "-nommxfilters", 7)) {
             rpng2_info.nommxfilters = TRUE;
         } else if (!strncmp(*argv, "-nommxcombine", 7)) {
@@ -432,7 +457,7 @@
         readpng2_version_info();
         fprintf(stderr, "\n"
           "Usage:  %s [-gamma exp] [-bgcolor bg | -bgpat pat] [-timing]\n"
-#if (defined(__i386__) || defined(_M_IX86))
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
           "        %*s [[-nommxfilters] [-nommxcombine] [-nommxinterlace] | -nommx]\n"
 #endif
           "        %*s file.png\n\n"
@@ -447,17 +472,17 @@
           "\t\t  transparent images; overrides -bgcolor option\n"
           "    -timing\tenables delay for every block read, to simulate modem\n"
           "\t\t  download of image (~36 Kbps)\n"
-#if (defined(__i386__) || defined(_M_IX86))
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
           "    -nommx*\tdisable optimized MMX routines for decoding row filters,\n"
           "\t\t  combining rows, and expanding interlacing, respectively\n"
 #endif
           "\nPress Q, Esc or mouse button 1 after image is displayed to quit.\n"
           "Press Q or Esc to quit this usage screen. ",
           PROGNAME,
-#if (defined(__i386__) || defined(_M_IX86))
-          strlen(PROGNAME), " ",
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
+          (int)strlen(PROGNAME), " ",
 #endif
-          strlen(PROGNAME), " ", default_display_exponent, num_bgpat);
+          (int)strlen(PROGNAME), " ", default_display_exponent, num_bgpat);
         fflush(stderr);
         do
             ch = _getch();
diff --git a/contrib/gregbook/rpng2-x.c b/contrib/gregbook/rpng2-x.c
index 5fc29ea..6bba649 100644
--- a/contrib/gregbook/rpng2-x.c
+++ b/contrib/gregbook/rpng2-x.c
@@ -13,7 +13,8 @@
    and "radial waves" patterns, respectively.
 
    to do:
-    - 8-bit support
+    - fix expose/redraw code:  don't draw entire row if only part exposed
+    - 8-bit (colormapped) X support
     - finish resizable checkerboard-gradient (sizes 4-128?)
     - use %.1023s to simplify truncation of title-bar string?
 
@@ -26,18 +27,32 @@
     - 1.11:  added -usleep option for demos; fixed command-line parsing bug
     - 1.12:  added -pause option for demos and testing
     - 1.20:  added runtime MMX-enabling/disabling and new -mmx* options
-    - 1.21:  fixed small X memory leak (thanks to Francois Petitjean)
-    - 1.22:  fixed XFreeGC() crash bug
+    - 1.21:  fixed some small X memory leaks (thanks to François Petitjean)
+    - 1.22:  fixed XFreeGC() crash bug (thanks to Patrick Welche)
+    - 1.23:  added -bgpat 0 mode (std white/gray checkerboard, 8x8 squares)
+    - 1.30:  added -loop option for -bgpat (ifdef FEATURE_LOOP); fixed bpp =
+              24; added support for X resources (thanks to Gerhard Niklasch)
+    - 1.31:  added code to skip unused chunks (thanks to Glenn Randers-Pehrson)
+    - 1.32:  added AMD64/EM64T support (__x86_64__); added basic expose/redraw
+              handling
+    - 2.00:  dual-licensed (added GNU GPL)
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2001 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -54,14 +69,34 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #define PROGNAME  "rpng2-x"
 #define LONGNAME  "Progressive PNG Viewer for X"
-#define VERSION   "1.22 of 16 August 2001"
+#define VERSION   "2.00 of 2 June 2007"
+#define RESNAME   "rpng2"	/* our X resource application name */
+#define RESCLASS  "Rpng"	/* our X resource class name */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <ctype.h>
 #include <string.h>
 #include <setjmp.h>       /* for jmpbuf declaration in readpng2.h */
 #include <time.h>
@@ -88,6 +123,12 @@
 #define CLIP8P(c)        MAX(0,(MIN((c),255)))   /* 8-bit pos. integer (uch) */
 #define ROUNDF(f)        ((int)(f + 0.5))
 
+#define QUIT(e,k) ((e.type == ButtonPress && e.xbutton.button == Button1) ||  \
+                  (e.type == KeyPress &&   /*  v--- or 1 for shifted keys */  \
+                  ((k = XLookupKeysym(&e.xkey, 0)) == XK_q || k == XK_Escape)))
+
+#define NO_24BIT_MASKS	/* undef case not fully written--only for redisplay() */
+
 #define rgb1_max   bg_freq
 #define rgb1_min   bg_gray
 #define rgb2_max   bg_bsat
@@ -117,18 +158,26 @@
                           *  overhead) */
 
 /* local prototypes */
-static void rpng2_x_init(void);
-static int  rpng2_x_create_window(void);
-static int  rpng2_x_load_bg_image(void);
-static void rpng2_x_display_row(ulg row);
-static void rpng2_x_finish_display(void);
-static void rpng2_x_cleanup(void);
-static int  rpng2_x_msb(ulg u32val);
+static void rpng2_x_init (void);
+static int  rpng2_x_create_window (void);
+static int  rpng2_x_load_bg_image (void);
+static void rpng2_x_display_row (ulg row);
+static void rpng2_x_finish_display (void);
+static void rpng2_x_redisplay_image (ulg startcol, ulg startrow,
+                                     ulg width, ulg height);
+#ifdef FEATURE_LOOP
+static void rpng2_x_reload_bg_image (void);
+static int  is_number (char *p);
+#endif
+static void rpng2_x_cleanup (void);
+static int  rpng2_x_msb (ulg u32val);
 
 
 static char titlebar[1024], *window_name = titlebar;
 static char *appname = LONGNAME;
 static char *icon_name = PROGNAME;
+static char *res_name = RESNAME;
+static char *res_class = RESCLASS;
 static char *filename;
 static FILE *infile;
 
@@ -139,7 +188,7 @@
 
 static int pat = 6;        /* must be less than num_bgpat */
 static int bg_image = 0;
-static int bgscale = 16;
+static int bgscale, bgscale_default = 16;
 static ulg bg_rowbytes;
 static uch *bg_data;
 
@@ -165,7 +214,8 @@
     {255, 127,   0},    /* 12:  orange */
     {192,  96,   0},    /* 13:  darker orange */
     { 24,  60,   0},    /* 14:  dark green-yellow */
-    { 85, 125, 200}     /* 15:  ice blue */
+    { 85, 125, 200},    /* 15:  ice blue */
+    {192, 192, 192}     /* 16:  Netscape/Mosaic gray */
 };
 /* not used for now, but should be for error-checking:
 static int num_rgb = sizeof(rgb) / sizeof(struct rgb_color);
@@ -193,6 +243,7 @@
     int rgb1_max, rgb1_min;     /* or bg_freq, bg_gray */
     int rgb2_max, rgb2_min;     /* or bg_bsat, bg_brot (both scaled by 10)*/
 } bg[] = {
+    {0,     1,1, 16,16},        /* checkered:  white vs. light gray (basic) */
     {0+8,   2,0,  1,15},        /* checkered:  tan/black vs. white/ice blue */
     {0+24,  2,0,  1,0},         /* checkered:  tan/black vs. white/black */
     {0+8,   4,5,  0,2},         /* checkered:  gold/yellow vs. black/tan */
@@ -244,6 +295,10 @@
     int error = 0;
     int timing = FALSE;
     int have_bg = FALSE;
+#ifdef FEATURE_LOOP
+    int loop = FALSE;
+    long loop_interval = -1;		/* seconds (100,000 max) */
+#endif
     double LUT_exponent;                /* just the lookup table */
     double CRT_exponent = 2.2;          /* just the monitor */
     double default_display_exponent;    /* whole display system */
@@ -344,13 +399,12 @@
             if (!*++argv)
                 ++error;
             else {
-                pat = atoi(*argv) - 1;
-                if (pat < 0 || pat >= num_bgpat)
-                    ++error;
-                else {
+                pat = atoi(*argv);
+                if (pat >= 0 && pat < num_bgpat) {
                     bg_image = TRUE;
                     have_bg = FALSE;
-                }
+                } else
+                    ++error;
             }
         } else if (!strncmp(*argv, "-usleep", 2)) {
             if (!*++argv)
@@ -363,7 +417,21 @@
             pause_after_pass = TRUE;
         } else if (!strncmp(*argv, "-timing", 2)) {
             timing = TRUE;
-#if (defined(__i386__) || defined(_M_IX86))
+#ifdef FEATURE_LOOP
+        } else if (!strncmp(*argv, "-loop", 2)) {
+            loop = TRUE;
+            if (!argv[1] || !is_number(argv[1]))
+                loop_interval = 2;
+            else {
+                ++argv;
+                loop_interval = atol(*argv);
+                if (loop_interval < 0)
+                    loop_interval = 2;
+                else if (loop_interval > 100000)   /* bit more than one day */
+                    loop_interval = 100000;
+            }
+#endif
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
         } else if (!strncmp(*argv, "-nommxfilters", 7)) {
             rpng2_info.nommxfilters = TRUE;
         } else if (!strncmp(*argv, "-nommxcombine", 7)) {
@@ -434,10 +502,14 @@
         readpng2_version_info();
         fprintf(stderr, "\n"
           "Usage:  %s [-display xdpy] [-gamma exp] [-bgcolor bg | -bgpat pat]\n"
-#if (defined(__i386__) || defined(_M_IX86))
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
           "        %*s [[-nommxfilters] [-nommxcombine] [-nommxinterlace] | -nommx]\n"
 #endif
+#ifdef FEATURE_LOOP
+          "        %*s [-usleep dur | -timing] [-pause] [-loop [sec]] file.png\n\n"
+#else
           "        %*s [-usleep dur | -timing] [-pause] file.png\n\n"
+#endif
           "    xdpy\tname of the target X display (e.g., ``hostname:0'')\n"
           "    exp \ttransfer-function exponent (``gamma'') of the display\n"
           "\t\t  system in floating-point format (e.g., ``%.1f''); equal\n"
@@ -446,9 +518,14 @@
           "    bg  \tdesired background color in 7-character hex RGB format\n"
           "\t\t  (e.g., ``#ff7700'' for orange:  same as HTML colors);\n"
           "\t\t  used with transparent images; overrides -bgpat\n"
-          "    pat \tdesired background pattern number (1-%d); used with\n"
+          "    pat \tdesired background pattern number (0-%d); used with\n"
           "\t\t  transparent images; overrides -bgcolor\n"
-#if (defined(__i386__) || defined(_M_IX86))
+#ifdef FEATURE_LOOP
+          "    -loop\tloops through background images after initial display\n"
+          "\t\t  is complete (depends on -bgpat)\n"
+          "    sec \tseconds to display each background image (default = 2)\n"
+#endif
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
           "    -nommx*\tdisable optimized MMX routines for decoding row filters,\n"
           "\t\t  combining rows, and expanding interlacing, respectively\n"
 #endif
@@ -460,10 +537,10 @@
           "\nPress Q, Esc or mouse button 1 (within image window, after image\n"
           "is displayed) to quit.\n"
           "\n", PROGNAME,
-#if (defined(__i386__) || defined(_M_IX86))
-          strlen(PROGNAME), " ",
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__))
+          (int)strlen(PROGNAME), " ",
 #endif
-          strlen(PROGNAME), " ", default_display_exponent, num_bgpat);
+          (int)strlen(PROGNAME), " ", default_display_exponent, num_bgpat-1);
         exit(1);
     }
 
@@ -530,13 +607,111 @@
     }
 
 
+#ifdef FEATURE_LOOP
+
+    if (loop && bg_image) {
+        for (;;) {
+            int i, use_sleep;
+            struct timeval now, then;
+
+            /* get current time and add loop_interval to get target time */
+            if (gettimeofday(&then, NULL) == 0) {
+                then.tv_sec += loop_interval;
+                use_sleep = FALSE;
+            } else
+                use_sleep = TRUE;
+
+            /* do quick check for a quit event but don't wait for it */
+            /* GRR BUG:  should also check for Expose events and redraw... */
+            if (XCheckMaskEvent(display, KeyPressMask | ButtonPressMask, &e))
+                if (QUIT(e,k))
+                    break;
+
+            /* generate next background image */
+            if (++pat >= num_bgpat)
+                pat = 0;
+            rpng2_x_reload_bg_image();
+
+            /* wait for timeout, using whatever means are available */
+            if (use_sleep || gettimeofday(&now, NULL) != 0) {
+                for (i = loop_interval;  i > 0;  --i) {
+                    sleep(1);
+                    /* GRR BUG:  also need to check for Expose (and redraw!) */
+                    if (XCheckMaskEvent(display, KeyPressMask | ButtonPressMask,
+                        &e) && QUIT(e,k))
+                        break;
+                }
+            } else {
+                /* Y2038 BUG! */
+                if (now.tv_sec < then.tv_sec ||
+                    (now.tv_sec == then.tv_sec && now.tv_usec < then.tv_usec))
+                {
+                    int quit = FALSE;
+                    long seconds_to_go = then.tv_sec - now.tv_sec;
+                    long usleep_usec;
+
+                    /* basically chew up most of remaining loop-interval with
+                     *  calls to sleep(1) interleaved with checks for quit
+                     *  events, but also recalc time-to-go periodically; when
+                     *  done, clean up any remaining time with usleep() call
+                     *  (could also use SIGALRM, but signals are a pain...) */
+                    while (seconds_to_go-- > 1) {
+                        int seconds_done = 0;
+
+                        for (i = seconds_to_go;  i > 0 && !quit;  --i) {
+                            sleep(1);
+                            /* GRR BUG:  need to check for Expose and redraw */
+                            if (XCheckMaskEvent(display, KeyPressMask |
+                                ButtonPressMask, &e) && QUIT(e,k))
+                                quit = TRUE;
+                            if (++seconds_done > 1000)
+                                break;   /* time to redo seconds_to_go meas. */
+                        }
+                        if (quit)
+                            break;
+
+                        /* OK, more than 1000 seconds since last check:
+                         *  correct the time-to-go measurement for drift */
+                        if (gettimeofday(&now, NULL) == 0) {
+                            if (now.tv_sec >= then.tv_sec)
+                                break;
+                            seconds_to_go = then.tv_sec - now.tv_sec;
+                        } else
+                            ++seconds_to_go;  /* restore what we subtracted */
+                    }
+                    if (quit)
+                        break;   /* breaks outer do-loop, skips redisplay */
+
+                    /* since difference between "now" and "then" is already
+                     *  eaten up to within a couple of seconds, don't need to
+                     *  worry about overflow--but might have overshot (neg.) */
+                    if (gettimeofday(&now, NULL) == 0) {
+                        usleep_usec = 1000000L*(then.tv_sec - now.tv_sec) +
+                          then.tv_usec - now.tv_usec;
+                        if (usleep_usec > 0)
+                            usleep((ulg)usleep_usec);
+                    }
+                }
+            }
+
+            /* composite image against new background and display (note that
+             *  we do not take into account the time spent doing this...) */
+            rpng2_x_redisplay_image (0, 0, rpng2_info.width, rpng2_info.height);
+        }
+
+    } else /* FALL THROUGH and do the normal thing */
+
+#endif /* FEATURE_LOOP */
+
     /* wait for the user to tell us when to quit */
 
-    do
+    do {
         XNextEvent(display, &e);
-    while (!(e.type == ButtonPress && e.xbutton.button == Button1) &&
-           !(e.type == KeyPress &&    /*  v--- or 1 for shifted keys */
-             ((k = XLookupKeysym(&e.xkey, 0)) == XK_q || k == XK_Escape) ));
+        if (e.type == Expose) {
+            XExposeEvent *ex = (XExposeEvent *)&e;
+            rpng2_x_redisplay_image (ex->x, ex->y, ex->width, ex->height);
+        }
+    } while (!QUIT(e,k));
 
 
     /* we're done:  clean up all image and X resources and go away */
@@ -594,7 +769,6 @@
          * If we/it segfault instead, seems like a libpng bug... */
 
         /* we're here via libpng callback, so if window fails, clean and bail */
-printf("readpng2_cleanup.\n");
         readpng2_cleanup(&rpng2_info);
         rpng2_x_cleanup();
         exit(2);
@@ -619,11 +793,12 @@
     XEvent e;
     XGCValues gcvalues;
     XSetWindowAttributes attr;
-    XSizeHints *size_hints;
     XTextProperty windowName, *pWindowName = &windowName;
     XTextProperty iconName, *pIconName = &iconName;
     XVisualInfo visual_info;
+    XSizeHints *size_hints;
     XWMHints *wm_hints;
+    XClassHint *class_hints;
 
 
     Trace((stderr, "beginning rpng2_x_create_window()\n"))
@@ -746,8 +921,13 @@
         wm_hints->flags = StateHint | InputHint  /* | IconPixmapHint */ ;
     }
 
+    if ((class_hints = XAllocClassHint()) != NULL) {
+        class_hints->res_name = res_name;
+        class_hints->res_class = res_class;
+    }
+
     XSetWMProperties(display, window, pWindowName, pIconName, NULL, 0,
-      size_hints, wm_hints, NULL);
+      size_hints, wm_hints, class_hints);
 
     /* various properties and hints no longer needed; free memory */
     if (pWindowName)
@@ -758,6 +938,8 @@
         XFree(size_hints);
     if (wm_hints)
        XFree(wm_hints);
+    if (class_hints)
+       XFree(class_hints);
 
     XMapWindow(display, window);
 
@@ -855,7 +1037,7 @@
     uch r1, r2, g1, g2, b1, b2;
     uch r1_inv, r2_inv, g1_inv, g2_inv, b1_inv, b2_inv;
     int k, hmax, max;
-    int xidx, yidx, yidx_max = (bgscale-1);
+    int xidx, yidx, yidx_max;
     int even_odd_vert, even_odd_horiz, even_odd;
     int invert_gradient2 = (bg[pat].type & 0x08);
     int invert_column;
@@ -877,6 +1059,9 @@
         return 1;
     }
 
+    bgscale = (pat == 0)? 8 : bgscale_default;
+    yidx_max = bgscale - 1;
+
 /*---------------------------------------------------------------------------
     Vertical gradients (ramps) in NxN squares, alternating direction and
     colors (N == bgscale).
@@ -1068,23 +1253,40 @@
 
     if (depth == 24 || depth == 32) {
         ulg red, green, blue;
+        int bpp = ximage->bits_per_pixel;
 
         for (row = 0;  row < rpng2_info.height;  ++row) {
             src = bg_data + row*bg_rowbytes;
             dest = ximage->data + row*ximage_rowbytes;
-            for (i = rpng2_info.width;  i > 0;  --i) {
-                red   = *src++;
-                green = *src++;
-                blue  = *src++;
-                pixel = (red   << RShift) |
-                        (green << GShift) |
-                        (blue  << BShift);
-                /* recall that we set ximage->byte_order = MSBFirst above */
-                /* GRR BUG:  this assumes bpp == 32, but may be 24: */
-                *dest++ = (char)((pixel >> 24) & 0xff);
-                *dest++ = (char)((pixel >> 16) & 0xff);
-                *dest++ = (char)((pixel >>  8) & 0xff);
-                *dest++ = (char)( pixel        & 0xff);
+            if (bpp == 32) {	/* slightly optimized version */
+                for (i = rpng2_info.width;  i > 0;  --i) {
+                    red   = *src++;
+                    green = *src++;
+                    blue  = *src++;
+                    pixel = (red   << RShift) |
+                            (green << GShift) |
+                            (blue  << BShift);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    *dest++ = (char)((pixel >> 24) & 0xff);
+                    *dest++ = (char)((pixel >> 16) & 0xff);
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                }
+            } else {
+                for (i = rpng2_info.width;  i > 0;  --i) {
+                    red   = *src++;
+                    green = *src++;
+                    blue  = *src++;
+                    pixel = (red   << RShift) |
+                            (green << GShift) |
+                            (blue  << BShift);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    /* GRR BUG?  this assumes bpp == 24 & bits are packed low */
+                    /*           (probably need to use RShift, RMask, etc.) */
+                    *dest++ = (char)((pixel >> 16) & 0xff);
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                }
             }
         }
 
@@ -1155,10 +1357,7 @@
               PROGNAME, prevpass + 1);
             do
                 XNextEvent(display, &e);
-            while (!(e.type == ButtonPress && e.xbutton.button == Button1)
-                   && !(e.type == KeyPress &&
-                   ((k = XLookupKeysym(&e.xkey, 0)) == XK_q
-                    || k == XK_Escape) )) ;
+            while (!QUIT(e,k));
         }
         fprintf(stderr, "%s:  pass %d of 7\r", PROGNAME, rpng2_info.pass + 1);
         fflush(stderr);
@@ -1179,6 +1378,7 @@
 
     if (depth == 24 || depth == 32) {
         ulg red, green, blue;
+        int bpp = ximage->bits_per_pixel;
 
         src = rpng2_info.image_data + row*rpng2_info.rowbytes;
         if (bg_image)
@@ -1193,11 +1393,18 @@
                         (green << GShift) |
                         (blue  << BShift);
                 /* recall that we set ximage->byte_order = MSBFirst above */
-                /* GRR BUG:  this assumes bpp == 32, but may be 24: */
-                *dest++ = (char)((pixel >> 24) & 0xff);
-                *dest++ = (char)((pixel >> 16) & 0xff);
-                *dest++ = (char)((pixel >>  8) & 0xff);
-                *dest++ = (char)( pixel        & 0xff);
+                if (bpp == 32) {
+                    *dest++ = (char)((pixel >> 24) & 0xff);
+                    *dest++ = (char)((pixel >> 16) & 0xff);
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                } else {
+                    /* GRR BUG?  this assumes bpp == 24 & bits are packed low */
+                    /*           (probably need to use RShift, RMask, etc.) */
+                    *dest++ = (char)((pixel >> 16) & 0xff);
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                }
             }
         } else /* if (rpng2_info.channels == 4) */ {
             for (i = rpng2_info.width;  i > 0;  --i) {
@@ -1230,11 +1437,18 @@
                         (green << GShift) |
                         (blue  << BShift);
                 /* recall that we set ximage->byte_order = MSBFirst above */
-                /* GRR BUG:  this assumes bpp == 32, but may be 24: */
-                *dest++ = (char)((pixel >> 24) & 0xff);
-                *dest++ = (char)((pixel >> 16) & 0xff);
-                *dest++ = (char)((pixel >>  8) & 0xff);
-                *dest++ = (char)( pixel        & 0xff);
+                if (bpp == 32) {
+                    *dest++ = (char)((pixel >> 24) & 0xff);
+                    *dest++ = (char)((pixel >> 16) & 0xff);
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                } else {
+                    /* GRR BUG?  this assumes bpp == 24 & bits are packed low */
+                    /*           (probably need to use RShift, RMask, etc.) */
+                    *dest++ = (char)((pixel >> 16) & 0xff);
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                }
             }
         }
 
@@ -1352,6 +1566,472 @@
 
 
 
+static void rpng2_x_redisplay_image(ulg startcol, ulg startrow,
+                                    ulg width, ulg height)
+{
+    uch bg_red   = rpng2_info.bg_red;
+    uch bg_green = rpng2_info.bg_green;
+    uch bg_blue  = rpng2_info.bg_blue;
+    uch *src, *src2=NULL;
+    char *dest;
+    uch r, g, b, a;
+    ulg i, row, lastrow = 0;
+    ulg pixel;
+    int ximage_rowbytes = ximage->bytes_per_line;
+
+
+    Trace((stderr, "beginning display loop (image_channels == %d)\n",
+      image_channels))
+    Trace((stderr, "   (width = %ld, rowbytes = %ld, ximage_rowbytes = %d)\n",
+      rpng2_info.width, image_rowbytes, ximage_rowbytes))
+    Trace((stderr, "   (bpp = %d)\n", ximage->bits_per_pixel))
+    Trace((stderr, "   (byte_order = %s)\n", ximage->byte_order == MSBFirst?
+      "MSBFirst" : (ximage->byte_order == LSBFirst? "LSBFirst" : "unknown")))
+
+/*---------------------------------------------------------------------------
+    Aside from the use of the rpng2_info struct and of src2 (for background
+    image), this routine is identical to rpng_x_display_image() in the non-
+    progressive version of the program--for the simple reason that redisplay
+    of the image against a new background happens after the image is fully
+    decoded and therefore is, by definition, non-progressive.
+  ---------------------------------------------------------------------------*/
+
+    if (depth == 24 || depth == 32) {
+        ulg red, green, blue;
+        int bpp = ximage->bits_per_pixel;
+
+        for (lastrow = row = startrow;  row < startrow+height;  ++row) {
+            src = rpng2_info.image_data + row*rpng2_info.rowbytes;
+            if (bg_image)
+                src2 = bg_data + row*bg_rowbytes;
+            dest = ximage->data + row*ximage_rowbytes;
+            if (rpng2_info.channels == 3) {
+                for (i = rpng2_info.width;  i > 0;  --i) {
+                    red   = *src++;
+                    green = *src++;
+                    blue  = *src++;
+#ifdef NO_24BIT_MASKS
+                    pixel = (red   << RShift) |
+                            (green << GShift) |
+                            (blue  << BShift);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    if (bpp == 32) {
+                        *dest++ = (char)((pixel >> 24) & 0xff);
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    } else {
+                        /* this assumes bpp == 24 & bits are packed low */
+                        /* (probably need to use RShift, RMask, etc.) */
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    }
+#else
+                    red   = (RShift < 0)? red   << (-RShift) : red   >> RShift;
+                    green = (GShift < 0)? green << (-GShift) : green >> GShift;
+                    blue  = (BShift < 0)? blue  << (-BShift) : blue  >> BShift;
+                    pixel = (red & RMask) | (green & GMask) | (blue & BMask);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    if (bpp == 32) {
+                        *dest++ = (char)((pixel >> 24) & 0xff);
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    } else {
+                        /* GRR BUG */
+                        /* this assumes bpp == 24 & bits are packed low */
+                        /* (probably need to use RShift/RMask/etc. here, too) */
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    }
+#endif
+                }
+
+            } else /* if (rpng2_info.channels == 4) */ {
+                for (i = rpng2_info.width;  i > 0;  --i) {
+                    r = *src++;
+                    g = *src++;
+                    b = *src++;
+                    a = *src++;
+                    if (bg_image) {
+                        bg_red   = *src2++;
+                        bg_green = *src2++;
+                        bg_blue  = *src2++;
+                    }
+                    if (a == 255) {
+                        red   = r;
+                        green = g;
+                        blue  = b;
+                    } else if (a == 0) {
+                        red   = bg_red;
+                        green = bg_green;
+                        blue  = bg_blue;
+                    } else {
+                        /* this macro (from png.h) composites the foreground
+                         * and background values and puts the result into the
+                         * first argument */
+                        alpha_composite(red,   r, a, bg_red);
+                        alpha_composite(green, g, a, bg_green);
+                        alpha_composite(blue,  b, a, bg_blue);
+                    }
+#ifdef NO_24BIT_MASKS
+                    pixel = (red   << RShift) |
+                            (green << GShift) |
+                            (blue  << BShift);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    if (bpp == 32) {
+                        *dest++ = (char)((pixel >> 24) & 0xff);
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    } else {
+                        /* this assumes bpp == 24 & bits are packed low */
+                        /* (probably need to use RShift, RMask, etc.) */
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    }
+#else
+                    red   = (RShift < 0)? red   << (-RShift) : red   >> RShift;
+                    green = (GShift < 0)? green << (-GShift) : green >> GShift;
+                    blue  = (BShift < 0)? blue  << (-BShift) : blue  >> BShift;
+                    pixel = (red & RMask) | (green & GMask) | (blue & BMask);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    if (bpp == 32) {
+                        *dest++ = (char)((pixel >> 24) & 0xff);
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    } else {
+                        /* GRR BUG */
+                        /* this assumes bpp == 24 & bits are packed low */
+                        /* (probably need to use RShift/RMask/etc. here, too) */
+                        *dest++ = (char)((pixel >> 16) & 0xff);
+                        *dest++ = (char)((pixel >>  8) & 0xff);
+                        *dest++ = (char)( pixel        & 0xff);
+                    }
+#endif
+                }
+            }
+            /* display after every 16 lines */
+            if (((row+1) & 0xf) == 0) {
+                XPutImage(display, window, gc, ximage, 0, (int)lastrow, 0,
+                  (int)lastrow, rpng2_info.width, 16);
+                XFlush(display);
+                lastrow = row + 1;
+            }
+        }
+
+    } else if (depth == 16) {
+        ush red, green, blue;
+
+        for (lastrow = row = startrow;  row < startrow+height;  ++row) {
+            src = rpng2_info.row_pointers[row];
+            if (bg_image)
+                src2 = bg_data + row*bg_rowbytes;
+            dest = ximage->data + row*ximage_rowbytes;
+            if (rpng2_info.channels == 3) {
+                for (i = rpng2_info.width;  i > 0;  --i) {
+                    red   = ((ush)(*src) << 8);
+                    ++src;
+                    green = ((ush)(*src) << 8);
+                    ++src;
+                    blue  = ((ush)(*src) << 8);
+                    ++src;
+                    pixel = ((red   >> RShift) & RMask) |
+                            ((green >> GShift) & GMask) |
+                            ((blue  >> BShift) & BMask);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                }
+            } else /* if (rpng2_info.channels == 4) */ {
+                for (i = rpng2_info.width;  i > 0;  --i) {
+                    r = *src++;
+                    g = *src++;
+                    b = *src++;
+                    a = *src++;
+                    if (bg_image) {
+                        bg_red   = *src2++;
+                        bg_green = *src2++;
+                        bg_blue  = *src2++;
+                    }
+                    if (a == 255) {
+                        red   = ((ush)r << 8);
+                        green = ((ush)g << 8);
+                        blue  = ((ush)b << 8);
+                    } else if (a == 0) {
+                        red   = ((ush)bg_red   << 8);
+                        green = ((ush)bg_green << 8);
+                        blue  = ((ush)bg_blue  << 8);
+                    } else {
+                        /* this macro (from png.h) composites the foreground
+                         * and background values and puts the result back into
+                         * the first argument (== fg byte here:  safe) */
+                        alpha_composite(r, r, a, bg_red);
+                        alpha_composite(g, g, a, bg_green);
+                        alpha_composite(b, b, a, bg_blue);
+                        red   = ((ush)r << 8);
+                        green = ((ush)g << 8);
+                        blue  = ((ush)b << 8);
+                    }
+                    pixel = ((red   >> RShift) & RMask) |
+                            ((green >> GShift) & GMask) |
+                            ((blue  >> BShift) & BMask);
+                    /* recall that we set ximage->byte_order = MSBFirst above */
+                    *dest++ = (char)((pixel >>  8) & 0xff);
+                    *dest++ = (char)( pixel        & 0xff);
+                }
+            }
+            /* display after every 16 lines */
+            if (((row+1) & 0xf) == 0) {
+                XPutImage(display, window, gc, ximage, 0, (int)lastrow, 0,
+                  (int)lastrow, rpng2_info.width, 16);
+                XFlush(display);
+                lastrow = row + 1;
+            }
+        }
+
+    } else /* depth == 8 */ {
+
+        /* GRR:  add 8-bit support */
+
+    }
+
+    Trace((stderr, "calling final XPutImage()\n"))
+    if (lastrow < startrow+height) {
+        XPutImage(display, window, gc, ximage, 0, (int)lastrow, 0,
+          (int)lastrow, rpng2_info.width, rpng2_info.height-lastrow);
+        XFlush(display);
+    }
+
+} /* end function rpng2_x_redisplay_image() */
+
+
+
+
+
+#ifdef FEATURE_LOOP
+
+static void rpng2_x_reload_bg_image(void)
+{
+    char *dest;
+    uch r1, r2, g1, g2, b1, b2;
+    uch r1_inv, r2_inv, g1_inv, g2_inv, b1_inv, b2_inv;
+    int k, hmax, max;
+    int xidx, yidx, yidx_max;
+    int even_odd_vert, even_odd_horiz, even_odd;
+    int invert_gradient2 = (bg[pat].type & 0x08);
+    int invert_column;
+    ulg i, row;
+
+
+    bgscale = (pat == 0)? 8 : bgscale_default;
+    yidx_max = bgscale - 1;
+
+/*---------------------------------------------------------------------------
+    Vertical gradients (ramps) in NxN squares, alternating direction and
+    colors (N == bgscale).
+  ---------------------------------------------------------------------------*/
+
+    if ((bg[pat].type & 0x07) == 0) {
+        uch r1_min  = rgb[bg[pat].rgb1_min].r;
+        uch g1_min  = rgb[bg[pat].rgb1_min].g;
+        uch b1_min  = rgb[bg[pat].rgb1_min].b;
+        uch r2_min  = rgb[bg[pat].rgb2_min].r;
+        uch g2_min  = rgb[bg[pat].rgb2_min].g;
+        uch b2_min  = rgb[bg[pat].rgb2_min].b;
+        int r1_diff = rgb[bg[pat].rgb1_max].r - r1_min;
+        int g1_diff = rgb[bg[pat].rgb1_max].g - g1_min;
+        int b1_diff = rgb[bg[pat].rgb1_max].b - b1_min;
+        int r2_diff = rgb[bg[pat].rgb2_max].r - r2_min;
+        int g2_diff = rgb[bg[pat].rgb2_max].g - g2_min;
+        int b2_diff = rgb[bg[pat].rgb2_max].b - b2_min;
+
+        for (row = 0;  row < rpng2_info.height;  ++row) {
+            yidx = (int)(row % bgscale);
+            even_odd_vert = (int)((row / bgscale) & 1);
+
+            r1 = r1_min + (r1_diff * yidx) / yidx_max;
+            g1 = g1_min + (g1_diff * yidx) / yidx_max;
+            b1 = b1_min + (b1_diff * yidx) / yidx_max;
+            r1_inv = r1_min + (r1_diff * (yidx_max-yidx)) / yidx_max;
+            g1_inv = g1_min + (g1_diff * (yidx_max-yidx)) / yidx_max;
+            b1_inv = b1_min + (b1_diff * (yidx_max-yidx)) / yidx_max;
+
+            r2 = r2_min + (r2_diff * yidx) / yidx_max;
+            g2 = g2_min + (g2_diff * yidx) / yidx_max;
+            b2 = b2_min + (b2_diff * yidx) / yidx_max;
+            r2_inv = r2_min + (r2_diff * (yidx_max-yidx)) / yidx_max;
+            g2_inv = g2_min + (g2_diff * (yidx_max-yidx)) / yidx_max;
+            b2_inv = b2_min + (b2_diff * (yidx_max-yidx)) / yidx_max;
+
+            dest = (char *)bg_data + row*bg_rowbytes;
+            for (i = 0;  i < rpng2_info.width;  ++i) {
+                even_odd_horiz = (int)((i / bgscale) & 1);
+                even_odd = even_odd_vert ^ even_odd_horiz;
+                invert_column =
+                  (even_odd_horiz && (bg[pat].type & 0x10));
+                if (even_odd == 0) {        /* gradient #1 */
+                    if (invert_column) {
+                        *dest++ = r1_inv;
+                        *dest++ = g1_inv;
+                        *dest++ = b1_inv;
+                    } else {
+                        *dest++ = r1;
+                        *dest++ = g1;
+                        *dest++ = b1;
+                    }
+                } else {                    /* gradient #2 */
+                    if ((invert_column && invert_gradient2) ||
+                        (!invert_column && !invert_gradient2))
+                    {
+                        *dest++ = r2;       /* not inverted or */
+                        *dest++ = g2;       /*  doubly inverted */
+                        *dest++ = b2;
+                    } else {
+                        *dest++ = r2_inv;
+                        *dest++ = g2_inv;   /* singly inverted */
+                        *dest++ = b2_inv;
+                    }
+                }
+            }
+        }
+
+/*---------------------------------------------------------------------------
+    Soft gradient-diamonds with scale = bgscale.  Code contributed by Adam
+    M. Costello.
+  ---------------------------------------------------------------------------*/
+
+    } else if ((bg[pat].type & 0x07) == 1) {
+
+        hmax = (bgscale-1)/2;   /* half the max weight of a color */
+        max = 2*hmax;           /* the max weight of a color */
+
+        r1 = rgb[bg[pat].rgb1_max].r;
+        g1 = rgb[bg[pat].rgb1_max].g;
+        b1 = rgb[bg[pat].rgb1_max].b;
+        r2 = rgb[bg[pat].rgb2_max].r;
+        g2 = rgb[bg[pat].rgb2_max].g;
+        b2 = rgb[bg[pat].rgb2_max].b;
+
+        for (row = 0;  row < rpng2_info.height;  ++row) {
+            yidx = (int)(row % bgscale);
+            if (yidx > hmax)
+                yidx = bgscale-1 - yidx;
+            dest = (char *)bg_data + row*bg_rowbytes;
+            for (i = 0;  i < rpng2_info.width;  ++i) {
+                xidx = (int)(i % bgscale);
+                if (xidx > hmax)
+                    xidx = bgscale-1 - xidx;
+                k = xidx + yidx;
+                *dest++ = (k*r1 + (max-k)*r2) / max;
+                *dest++ = (k*g1 + (max-k)*g2) / max;
+                *dest++ = (k*b1 + (max-k)*b2) / max;
+            }
+        }
+
+/*---------------------------------------------------------------------------
+    Radial "starburst" with azimuthal sinusoids; [eventually number of sinu-
+    soids will equal bgscale?].  This one is slow but very cool.  Code con-
+    tributed by Pieter S. van der Meulen (originally in Smalltalk).
+  ---------------------------------------------------------------------------*/
+
+    } else if ((bg[pat].type & 0x07) == 2) {
+        uch ch;
+        int ii, x, y, hw, hh, grayspot;
+        double freq, rotate, saturate, gray, intensity;
+        double angle=0.0, aoffset=0.0, maxDist, dist;
+        double red=0.0, green=0.0, blue=0.0, hue, s, v, f, p, q, t;
+
+        hh = (int)(rpng2_info.height / 2);
+        hw = (int)(rpng2_info.width / 2);
+
+        /* variables for radial waves:
+         *   aoffset:  number of degrees to rotate hue [CURRENTLY NOT USED]
+         *   freq:  number of color beams originating from the center
+         *   grayspot:  size of the graying center area (anti-alias)
+         *   rotate:  rotation of the beams as a function of radius
+         *   saturate:  saturation of beams' shape azimuthally
+         */
+        angle = CLIP(angle, 0.0, 360.0);
+        grayspot = CLIP(bg[pat].bg_gray, 1, (hh + hw));
+        freq = MAX((double)bg[pat].bg_freq, 0.0);
+        saturate = (double)bg[pat].bg_bsat * 0.1;
+        rotate = (double)bg[pat].bg_brot * 0.1;
+        gray = 0.0;
+        intensity = 0.0;
+        maxDist = (double)((hw*hw) + (hh*hh));
+
+        for (row = 0;  row < rpng2_info.height;  ++row) {
+            y = (int)(row - hh);
+            dest = (char *)bg_data + row*bg_rowbytes;
+            for (i = 0;  i < rpng2_info.width;  ++i) {
+                x = (int)(i - hw);
+                angle = (x == 0)? PI_2 : atan((double)y / (double)x);
+                gray = (double)MAX(ABS(y), ABS(x)) / grayspot;
+                gray = MIN(1.0, gray);
+                dist = (double)((x*x) + (y*y)) / maxDist;
+                intensity = cos((angle+(rotate*dist*PI)) * freq) *
+                  gray * saturate;
+                intensity = (MAX(MIN(intensity,1.0),-1.0) + 1.0) * 0.5;
+                hue = (angle + PI) * INV_PI_360 + aoffset;
+                s = gray * ((double)(ABS(x)+ABS(y)) / (double)(hw + hh));
+                s = MIN(MAX(s,0.0), 1.0);
+                v = MIN(MAX(intensity,0.0), 1.0);
+
+                if (s == 0.0) {
+                    ch = (uch)(v * 255.0);
+                    *dest++ = ch;
+                    *dest++ = ch;
+                    *dest++ = ch;
+                } else {
+                    if ((hue < 0.0) || (hue >= 360.0))
+                        hue -= (((int)(hue / 360.0)) * 360.0);
+                    hue /= 60.0;
+                    ii = (int)hue;
+                    f = hue - (double)ii;
+                    p = (1.0 - s) * v;
+                    q = (1.0 - (s * f)) * v;
+                    t = (1.0 - (s * (1.0 - f))) * v;
+                    if      (ii == 0) { red = v; green = t; blue = p; }
+                    else if (ii == 1) { red = q; green = v; blue = p; }
+                    else if (ii == 2) { red = p; green = v; blue = t; }
+                    else if (ii == 3) { red = p; green = q; blue = v; }
+                    else if (ii == 4) { red = t; green = p; blue = v; }
+                    else if (ii == 5) { red = v; green = p; blue = q; }
+                    *dest++ = (uch)(red * 255.0);
+                    *dest++ = (uch)(green * 255.0);
+                    *dest++ = (uch)(blue * 255.0);
+                }
+            }
+        }
+    }
+
+} /* end function rpng2_x_reload_bg_image() */
+
+
+
+
+
+static int is_number(char *p)
+{
+    while (*p) {
+        if (!isdigit(*p))
+            return FALSE;
+        ++p;
+    }
+    return TRUE;
+}
+
+#endif /* FEATURE_LOOP */
+
+
+
+
+
 static void rpng2_x_cleanup(void)
 {
     if (bg_image && bg_data) {
diff --git a/contrib/gregbook/wpng.c b/contrib/gregbook/wpng.c
index d6e8514..a06e352 100644
--- a/contrib/gregbook/wpng.c
+++ b/contrib/gregbook/wpng.c
@@ -5,8 +5,10 @@
    This program converts certain NetPBM binary files (grayscale and RGB,
    maxval = 255) to PNG.  Non-interlaced PNGs are written progressively;
    interlaced PNGs are read and written in one memory-intensive blast.
+
    Thanks to Jean-loup Gailly for providing the necessary trick to read
-   interactive text from the keyboard while stdin is redirected.
+   interactive text from the keyboard while stdin is redirected.  Thanks
+   to Cosmin Truta for Cygwin fixes.
 
    NOTE:  includes provisional support for PNM type "8" (portable alphamap)
           images, presumed to be a 32-bit interleaved RGBA format; no pro-
@@ -24,16 +26,32 @@
     - 1.02:  modified to allow abbreviated options
     - 1.03:  removed extraneous character from usage screen; fixed bug in
               command-line parsing
+    - 1.04:  fixed DOS/OS2/Win32 detection, including partial Cygwin fix
+              (see http://home.att.net/~perlspinr/diffs/GregBook_cygwin.diff)
+    - 2.00:  dual-licensed (added GNU GPL)
+
+        [REPORTED BUG (win32 only):  "contrib/gregbook/wpng.c - cmd line
+         dose not work!  In order to do something useful I needed to redirect
+         both input and output, with cygwin and with bcc32 as well.  Under
+         Linux, the same wpng appears to work fine.  I don't know what is
+         the problem."]
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2000 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -50,16 +68,35 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #define PROGNAME  "wpng"
-#define VERSION   "1.03 of 19 March 2000"
+#define VERSION   "2.00 of 2 June 2007"
 #define APPNAME   "Simple PGM/PPM/PAM to PNG Converter"
 
 #if defined(__MSDOS__) || defined(__OS2__)
 #  define DOS_OS2_W32
-#elif defined(_WIN32) || defined(__WIN32__)
-#  define DOS_OS2_W32
+#elif defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
+#  ifndef __GNUC__   /* treat Win32 native ports of gcc as Unix environments */
+#    define DOS_OS2_W32
+#  endif
 #endif
 
 #include <stdio.h>
diff --git a/contrib/gregbook/writepng.c b/contrib/gregbook/writepng.c
index 6802b12..e6d81ea 100644
--- a/contrib/gregbook/writepng.c
+++ b/contrib/gregbook/writepng.c
@@ -4,13 +4,20 @@
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2000 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -27,6 +34,23 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 
diff --git a/contrib/gregbook/writepng.h b/contrib/gregbook/writepng.h
index 93c3da8..78b966b 100644
--- a/contrib/gregbook/writepng.h
+++ b/contrib/gregbook/writepng.h
@@ -4,13 +4,20 @@
 
   ---------------------------------------------------------------------------
 
-      Copyright (c) 1998-2000 Greg Roelofs.  All rights reserved.
+      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
 
       This software is provided "as is," without warranty of any kind,
       express or implied.  In no event shall the author or contributors
       be held liable for any damages arising in any way from the use of
       this software.
 
+      The contents of this file are DUAL-LICENSED.  You may modify and/or
+      redistribute this software according to the terms of one of the
+      following two licenses (at your option):
+
+
+      LICENSE 1 ("BSD-like with advertising clause"):
+
       Permission is granted to anyone to use this software for any purpose,
       including commercial applications, and to alter it and redistribute
       it freely, subject to the following restrictions:
@@ -27,6 +34,23 @@
             and contributors for the book, "PNG: The Definitive Guide,"
             published by O'Reilly and Associates.
 
+
+      LICENSE 2 (GNU GPL v2 or later):
+
+      This program is free software; you can redistribute it and/or modify
+      it under the terms of the GNU General Public License as published by
+      the Free Software Foundation; either version 2 of the License, or
+      (at your option) any later version.
+
+      This program is distributed in the hope that it will be useful,
+      but WITHOUT ANY WARRANTY; without even the implied warranty of
+      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+      GNU General Public License for more details.
+
+      You should have received a copy of the GNU General Public License
+      along with this program; if not, write to the Free Software Foundation,
+      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
   ---------------------------------------------------------------------------*/
 
 #ifndef TRUE
diff --git a/libpng-1.0.26.txt b/libpng-1.0.27rc1.txt
similarity index 99%
rename from libpng-1.0.26.txt
rename to libpng-1.0.27rc1.txt
index 1756d9f..4f3e8d9 100644
--- a/libpng-1.0.26.txt
+++ b/libpng-1.0.27rc1.txt
@@ -1,6 +1,6 @@
 libpng.txt - A description on how to use and modify libpng
 
- libpng version 1.0.26 - May 15, 2007
+ libpng version 1.0.27rc1 - July 31, 2007
  Updated and distributed by Glenn Randers-Pehrson
  <glennrp at users.sourceforge.net>
  Copyright (c) 1998-2007 Glenn Randers-Pehrson
@@ -2766,13 +2766,13 @@
 
 VIII. Y2K Compliance in libpng
 
-May 15, 2007
+July 31, 2007
 
 Since the PNG Development group is an ad-hoc body, we can't make
 an official declaration.
 
 This is your unofficial assurance that libpng from version 0.71 and
-upward through 1.0.26 are Y2K compliant.  It is my belief that earlier
+upward through 1.0.27rc1 are Y2K compliant.  It is my belief that earlier
 versions were also Y2K compliant.
 
 Libpng only has three year fields.  One is a 2-byte unsigned integer that
diff --git a/libpng.3 b/libpng.3
index 3ba8bdf..e7d05ec 100644
--- a/libpng.3
+++ b/libpng.3
@@ -1,6 +1,6 @@
-.TH LIBPNG 3 "May 15, 2007"
+.TH LIBPNG 3 "July 31, 2007"
 .SH NAME
-libpng \- Portable Network Graphics (PNG) Reference Library 1.0.26
+libpng \- Portable Network Graphics (PNG) Reference Library 1.0.27rc1
 .SH SYNOPSIS
 \fB
 #include <png.h>\fP
@@ -410,7 +410,7 @@
 .SH LIBPNG.TXT
 libpng.txt - A description on how to use and modify libpng
 
- libpng version 1.0.26 - May 15, 2007
+ libpng version 1.0.27rc1 - July 31, 2007
  Updated and distributed by Glenn Randers-Pehrson
  <glennrp at users.sourceforge.net>
  Copyright (c) 1998-2007 Glenn Randers-Pehrson
@@ -3176,13 +3176,13 @@
 
 .SH VIII. Y2K Compliance in libpng
 
-May 15, 2007
+July 31, 2007
 
 Since the PNG Development group is an ad-hoc body, we can't make
 an official declaration.
 
 This is your unofficial assurance that libpng from version 0.71 and
-upward through 1.0.26 are Y2K compliant.  It is my belief that earlier
+upward through 1.0.27rc1 are Y2K compliant.  It is my belief that earlier
 versions were also Y2K compliant.
 
 Libpng only has three year fields.  One is a 2-byte unsigned integer that
@@ -3371,6 +3371,9 @@
  1.2.17              13    10217  12.so.0.17[.0]
  1.0.26              10    10026  10.so.0.26[.0]
  1.2.18              13    10218  12.so.0.18[.0]
+ 1.2.19beta1-31      13    10219  12.so.0.19[.0]
+ 1.0.27rc1           10    10027  10.so.0.27[.0]
+ 1.2.19rc1           13    10219  12.so.0.19[.0]
 
 Henceforth the source version will match the shared-library minor
 and patch numbers; the shared-library major version number will be
@@ -3383,7 +3386,7 @@
 release number plus "betaNN" or "rcN".
 
 .SH "SEE ALSO"
-libpngpf(3), png(5)
+.IR libpngpf(3) ", " png(5)
 .LP
 .IR libpng :
 .IP
@@ -3426,7 +3429,7 @@
 
 Thanks to Frank J. T. Wojcik for helping with the documentation.
 
-Libpng version 1.0.26 - May 15, 2007:
+Libpng version 1.0.27rc1 - July 31, 2007:
 Initially created in 1995 by Guy Eric Schalnat, then of Group 42, Inc.
 Currently maintained by Glenn Randers-Pehrson (glennrp at users.sourceforge.net).
 
@@ -3447,7 +3450,7 @@
 If you modify libpng you may insert additional notices immediately following
 this sentence.
 
-libpng versions 1.2.6, August 15, 2004, through 1.0.26, May 15, 2007, are
+libpng versions 1.2.6, August 15, 2004, through 1.0.27rc1, July 31, 2007, are
 Copyright (c) 2004,2006-2007 Glenn Randers-Pehrson, and are
 distributed according to the same disclaimer and license as libpng-1.2.5
 with the following individual added to the list of Contributing Authors
@@ -3546,7 +3549,7 @@
 
 Glenn Randers-Pehrson
 glennrp at users.sourceforge.net
-May 15, 2007
+July 31, 2007
 
 .\" end of man page
 
diff --git a/libpngpf.3 b/libpngpf.3
index 9ddc87c..42cec6f 100644
--- a/libpngpf.3
+++ b/libpngpf.3
@@ -1,6 +1,6 @@
-.TH LIBPNGPF 3 "May 15, 2007"
+.TH LIBPNGPF 3 "July 31, 2007"
 .SH NAME
-libpng \- Portable Network Graphics (PNG) Reference Library 1.0.26
+libpng \- Portable Network Graphics (PNG) Reference Library 1.0.27rc1
 (private functions)
 .SH SYNOPSIS
 \fB#include <png.h>\fP
@@ -269,6 +269,6 @@
 See png.h for more information on these functions.
 
 .SH SEE ALSO
-libpng(3), png(5)
+.IR libpng(3) ", " png(5)
 .SH AUTHOR
 Glenn Randers-Pehrson
diff --git a/png.5 b/png.5
index bf6c6fd..c851d14 100644
--- a/png.5
+++ b/png.5
@@ -1,4 +1,4 @@
-.TH PNG 5 "May 15, 2007"
+.TH PNG 5 "July 31, 2007"
 .SH NAME
 png \- Portable Network Graphics (PNG) format
 .SH DESCRIPTION
@@ -18,7 +18,7 @@
 platforms.
 
 .SH "SEE ALSO"
-.IR libpng(3), zlib(3), deflate(5), and zlib(5)
+.IR libpng(3) ", " zlib(3) ", " deflate(5) ", and " zlib(5)
 .LP
 PNG specification (second edition), November 2003:
 .IP
diff --git a/png.c b/png.c
index b62b9f2..478b655 100644
--- a/png.c
+++ b/png.c
@@ -1,7 +1,7 @@
 
 /* png.c - location for general purpose libpng functions
  *
- * Last changed in libpng 1.2.17 May 15, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -13,20 +13,20 @@
 #include "png.h"
 
 /* Generate a compiler error if there is an old png.h in the search path. */
-typedef version_1_0_26 Your_png_h_is_not_version_1_0_26;
+typedef version_1_0_27rc1 Your_png_h_is_not_version_1_0_27rc1;
 
 /* Version information for C files.  This had better match the version
  * string defined in png.h.  */
 
 #ifdef PNG_USE_GLOBAL_ARRAYS
 /* png_libpng_ver was changed to a function in version 1.0.5c */
-const char png_libpng_ver[18] = PNG_LIBPNG_VER_STRING;
+PNG_CONST char png_libpng_ver[18] = PNG_LIBPNG_VER_STRING;
 
 #ifdef PNG_READ_SUPPORTED
 
 /* png_sig was changed to a function in version 1.0.5c */
 /* Place to hold the signature string for a PNG file. */
-const png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+PNG_CONST png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
 #endif /* PNG_READ_SUPPORTED */
 
 /* Invoke global declarations for constant strings for known chunk types */
@@ -56,32 +56,32 @@
 /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
 
 /* start of interlace block */
-const int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+PNG_CONST int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
 
 /* offset to next interlace block */
-const int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+PNG_CONST int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
 
 /* start of interlace block in the y direction */
-const int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+PNG_CONST int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
 
 /* offset to next interlace block in the y direction */
-const int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+PNG_CONST int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
 
 /* width of interlace block (used in assembler routines only) */
-#ifdef PNG_HAVE_MMX_COMBINE_ROW
-const int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+#if defined(PNG_HAVE_MMX_COMBINE_ROW) || defined(PNG_OPTIMIZED_CODE_SUPPORTED)
+PNG_CONST int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
 #endif
 
 /* Height of interlace block.  This is not currently used - if you need
  * it, uncomment it here and in png.h
-const int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+PNG_CONST int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
 */
 
 /* Mask to determine which pixels are valid in a pass */
-const int FARDATA png_pass_mask[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
+PNG_CONST int FARDATA png_pass_mask[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
 
 /* Mask to determine which pixels to overwrite while displaying */
-const int FARDATA png_pass_dsp_mask[]
+PNG_CONST int FARDATA png_pass_dsp_mask[]
    = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
 
 #endif /* PNG_READ_SUPPORTED */
@@ -134,7 +134,7 @@
 #if defined(PNG_1_0_X) || defined(PNG_1_2_X)
 /* (Obsolete) function to check signature bytes.  It does not allow one
  * to check a partial signature.  This function might be removed in the
- * future - use png_sig_cmp().  Returns true (nonzero) if the file is a PNG.
+ * future - use png_sig_cmp().  Returns true (nonzero) if the file is PNG.
  */
 int PNGAPI
 png_check_sig(png_bytep sig, int num)
@@ -674,7 +674,7 @@
 #ifdef USE_FAR_KEYWORD
    {
       char near_time_buf[29];
-      sprintf(near_time_buf, "%d %s %d %02d:%02d:%02d +0000",
+      png_snprintf6(near_time_buf,29,"%d %s %d %02d:%02d:%02d +0000",
           ptime->day % 32, short_months[(ptime->month - 1) % 12],
           ptime->year, ptime->hour % 24, ptime->minute % 60,
           ptime->second % 61);
@@ -682,7 +682,7 @@
           29*png_sizeof(char));
    }
 #else
-   sprintf(png_ptr->time_buffer, "%d %s %d %02d:%02d:%02d +0000",
+   png_snprintf6(png_ptr->time_buffer,29,"%d %s %d %02d:%02d:%02d +0000",
        ptime->day % 32, short_months[(ptime->month - 1) % 12],
        ptime->year, ptime->hour % 24, ptime->minute % 60,
        ptime->second % 61);
@@ -705,12 +705,11 @@
 png_charp PNGAPI
 png_get_copyright(png_structp png_ptr)
 {
-   if (&png_ptr != NULL)  /* silence compiler warning about unused png_ptr */
-   return ((png_charp) "\n libpng version 1.0.26 - May 15, 2007\n\
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) "\n libpng version 1.0.27rc1 - July 31, 2007\n\
    Copyright (c) 1998-2007 Glenn Randers-Pehrson\n\
    Copyright (c) 1996-1997 Andreas Dilger\n\
    Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.\n");
-   return ((png_charp) "");
 }
 
 /* The following return the library version as a short string in the
@@ -725,27 +724,70 @@
 png_get_libpng_ver(png_structp png_ptr)
 {
    /* Version of *.c files used when building libpng */
-   if (&png_ptr != NULL)  /* silence compiler warning about unused png_ptr */
-      return ((png_charp) PNG_LIBPNG_VER_STRING);
-   return ((png_charp) "");
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_LIBPNG_VER_STRING);
 }
 
 png_charp PNGAPI
 png_get_header_ver(png_structp png_ptr)
 {
    /* Version of *.h files used when building libpng */
-   if (&png_ptr != NULL)  /* silence compiler warning about unused png_ptr */
-      return ((png_charp) PNG_LIBPNG_VER_STRING);
-   return ((png_charp) "");
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_LIBPNG_VER_STRING);
 }
 
 png_charp PNGAPI
 png_get_header_version(png_structp png_ptr)
 {
    /* Returns longer string containing both version and date */
-   if (&png_ptr != NULL)  /* silence compiler warning about unused png_ptr */
-      return ((png_charp) PNG_HEADER_VERSION_STRING);
-   return ((png_charp) "");
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_HEADER_VERSION_STRING
+#ifdef PNG_READ_SUPPORTED
+#  ifdef PNG_USE_PNGGCCRD
+#    ifdef __x86_64__
+#      ifdef __PIC__
+   "     (PNGGCRD x86_64, PIC)\n"
+#      else
+#        ifdef PNG_THREAD_UNSAFE_OK
+   "     (PNGGCRD x86_64, Thread unsafe)\n"
+#        else
+   "     (PNGGCRD x86_64, Thread safe)\n"
+#        endif
+#      endif
+#    else
+#    ifdef PNG_THREAD_UNSAFE_OK
+   "     (PNGGCRD, Thread unsafe)\n"
+#      else
+   "     (PNGGCRD, Thread safe)\n"
+#      endif
+#    endif
+#  else
+#    ifdef PNG_USE_PNGVCRD
+#      ifdef __x86_64__
+   "     (x86_64 PNGVCRD)\n"
+#      else
+   "     (PNGVCRD)\n"
+#      endif
+#    else
+#      ifdef __x86_64__
+#        ifdef PNG_OPTIMIZED_CODE_SUPPORTED
+   "     (x86_64 OPTIMIZED)\n"
+#        else
+   "     (x86_64 NOT OPTIMIZED)\n"
+#        endif
+#      else
+#        ifdef PNG_OPTIMIZED_CODE_SUPPORTED
+   "     (OPTIMIZED)\n"
+#        else
+   "     (NOT OPTIMIZED)\n"
+#        endif
+#      endif
+#    endif
+#  endif
+#else
+   "     (NO READ SUPPORT)\n"
+#endif
+   );
 }
 
 #if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
@@ -756,7 +798,7 @@
    /* check chunk_name and return "keep" value if it's on the list, else 0 */
    int i;
    png_bytep p;
-   if((png_ptr == NULL && chunk_name == NULL) || png_ptr->num_chunk_list<=0)
+   if(png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list<=0)
       return 0;
    p=png_ptr->chunk_list+png_ptr->num_chunk_list*5-5;
    for (i = png_ptr->num_chunk_list; i; i--, p-=5)
@@ -843,8 +885,8 @@
     return -1;
 }
 #endif
-#endif /* PNG_1_0_X  && PNG_ASSEMBLER_CODE_SUPPORTED */
-#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_1_0_X */
+#endif /* PNG_READ_SUPPORTED && PNG_ASSEMBLER_CODE_SUPPORTED */
 
 #if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
 #ifdef PNG_SIZE_T
diff --git a/png.h b/png.h
index 97aa781..6172366 100644
--- a/png.h
+++ b/png.h
@@ -1,7 +1,7 @@
 
 /* png.h - header file for PNG reference library
  *
- * libpng version 1.0.26 - May 15, 2007
+ * libpng version 1.0.27rc1 - July 31, 2007
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
  * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
@@ -9,7 +9,7 @@
  * Authors and maintainers:
  *  libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
  *  libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger
- *  libpng versions 0.97, January 1998, through 1.0.26 - May 15, 2007: Glenn
+ *  libpng versions 0.97, January 1998, through 1.0.27rc1 - July 31, 2007: Glenn
  *  See also "Contributing Authors", below.
  *
  * Note about libpng version numbers:
@@ -150,6 +150,9 @@
  *    1.2.17                  13    10217  12.so.0.17[.0]
  *    1.0.26                  10    10026  10.so.0.26[.0]
  *    1.2.18                  13    10218  12.so.0.18[.0]
+ *    1.2.19beta1-31          13    10219  12.so.0.19[.0]
+ *    1.0.27rc1               10    10027  10.so.0.27[.0]
+ *    1.2.19rc1               13    10219  12.so.0.19[.0]
  *
  *    Henceforth the source version will match the shared-library major
  *    and minor numbers; the shared-library major version number will be
@@ -179,7 +182,7 @@
  * If you modify libpng you may insert additional notices immediately following
  * this sentence.
  *
- * libpng versions 1.2.6, August 15, 2004, through 1.0.26, May 15, 2007, are
+ * libpng versions 1.2.6, August 15, 2004, through 1.0.27rc1, July 31, 2007, are
  * Copyright (c) 2004, 2006-2007 Glenn Randers-Pehrson, and are
  * distributed according to the same disclaimer and license as libpng-1.2.5
  * with the following individual added to the list of Contributing Authors:
@@ -291,13 +294,13 @@
  * Y2K compliance in libpng:
  * =========================
  *
- *    May 15, 2007
+ *    July 31, 2007
  *
  *    Since the PNG Development group is an ad-hoc body, we can't make
  *    an official declaration.
  *
  *    This is your unofficial assurance that libpng from version 0.71 and
- *    upward through 1.0.26 are Y2K compliant.  It is my belief that earlier
+ *    upward through 1.0.27rc1 are Y2K compliant.  It is my belief that earlier
  *    versions were also Y2K compliant.
  *
  *    Libpng only has three year fields.  One is a 2-byte unsigned integer
@@ -353,9 +356,9 @@
  */
 
 /* Version information for png.h - this should match the version in png.c */
-#define PNG_LIBPNG_VER_STRING "1.0.26"
+#define PNG_LIBPNG_VER_STRING "1.0.27rc1"
 #define PNG_HEADER_VERSION_STRING \
-   " libpng version 1.0.26 - May 15, 2007 (header)\n"
+   " libpng version 1.0.27rc1 - July 31, 2007\n"
 
 #define PNG_LIBPNG_VER_SONUM   0
 #define PNG_LIBPNG_VER_DLLNUM  10
@@ -363,11 +366,11 @@
 /* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
 #define PNG_LIBPNG_VER_MAJOR   1
 #define PNG_LIBPNG_VER_MINOR   0
-#define PNG_LIBPNG_VER_RELEASE 26
+#define PNG_LIBPNG_VER_RELEASE 27
 /* This should match the numeric part of the final component of
  * PNG_LIBPNG_VER_STRING, omitting any leading zero: */
 
-#define PNG_LIBPNG_VER_BUILD  0
+#define PNG_LIBPNG_VER_BUILD  1
 
 /* Release Status */
 #define PNG_LIBPNG_BUILD_ALPHA    1
@@ -384,14 +387,14 @@
 #define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
                                        PNG_LIBPNG_BUILD_PRIVATE */
 
-#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE
+#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_RC
 
 /* Careful here.  At one time, Guy wanted to use 082, but that would be octal.
  * We must not include leading zeros.
  * Versions 0.7 through 1.0.0 were in the range 0 to 100 here (only
  * version 1.0.0 was mis-numbered 100 instead of 10000).  From
  * version 1.0.1 it's    xxyyzz, where x=major, y=minor, z=release */
-#define PNG_LIBPNG_VER 10026 /* 1.0.26 */
+#define PNG_LIBPNG_VER 10027 /* 1.0.27 */
 
 #ifndef PNG_VERSION_INFO_ONLY
 /* include the compression library's header */
@@ -479,7 +482,7 @@
  * the version above.
  */
 #ifdef PNG_USE_GLOBAL_ARRAYS
-PNG_EXPORT_VAR (const char) png_libpng_ver[18];
+PNG_EXPORT_VAR (PNG_CONST char) png_libpng_ver[18];
   /* need room for 99.99.99beta99z */
 #else
 #define png_libpng_ver png_get_header_ver(NULL)
@@ -488,17 +491,17 @@
 #ifdef PNG_USE_GLOBAL_ARRAYS
 /* This was removed in version 1.0.5c */
 /* Structures to facilitate easy interlacing.  See png.c for more details */
-PNG_EXPORT_VAR (const int FARDATA) png_pass_start[7];
-PNG_EXPORT_VAR (const int FARDATA) png_pass_inc[7];
-PNG_EXPORT_VAR (const int FARDATA) png_pass_ystart[7];
-PNG_EXPORT_VAR (const int FARDATA) png_pass_yinc[7];
-PNG_EXPORT_VAR (const int FARDATA) png_pass_mask[7];
-PNG_EXPORT_VAR (const int FARDATA) png_pass_dsp_mask[7];
-#ifdef PNG_USE_PNGGCCRD
-PNG_EXPORT_VAR (const int FARDATA) png_pass_width[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_start[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_inc[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_ystart[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_yinc[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_mask[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_dsp_mask[7];
+#if defined(PNG_HAVE_MMX_COMBINE_ROW) || defined(PNG_OPTIMIZED_CODE_SUPPORTED)
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_width[7];
 #endif
 /* This isn't currently used.  If you need it, see png.c for more details.
-PNG_EXPORT_VAR (const int FARDATA) png_pass_height[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_height[7];
 */
 #endif
 
@@ -1352,7 +1355,7 @@
    png_byte filter_type;
 #endif
 
-#if defined(PNG_1_0_X) || (defined(PNG_DEBUG) && defined(PNG_USE_PNGGCCRD))
+#if defined(PNG_1_0_X)
 /* New member added in libpng-1.0.10, ifdef'ed out in 1.2.0 */
    png_uint_32 row_buf_size;
 #endif
@@ -1406,7 +1409,7 @@
 /* This triggers a compiler error in png.c, if png.c and png.h
  * do not agree upon the version number.
  */
-typedef png_structp version_1_0_26;
+typedef png_structp version_1_0_27rc1;
 
 typedef png_struct FAR * FAR * png_structpp;
 
@@ -2062,13 +2065,17 @@
 extern PNG_EXPORT(void,png_chunk_error) PNGARG((png_structp png_ptr,
    png_const_charp error_message));
 
+#ifndef PNG_NO_WARNINGS
 /* Non-fatal error in libpng.  Can continue, but may have a problem. */
 extern PNG_EXPORT(void,png_warning) PNGARG((png_structp png_ptr,
    png_const_charp warning_message));
 
+#ifdef PNG_READ_SUPPORTED
 /* Non-fatal error in libpng, chunk name is prepended to message. */
 extern PNG_EXPORT(void,png_chunk_warning) PNGARG((png_structp png_ptr,
    png_const_charp warning_message));
+#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_NO_WARNINGS */
 
 /* The png_set_<chunk> functions are for storing values in the png_info_struct.
  * Similarly, the png_get_<chunk> calls are used to read values from the
@@ -2817,7 +2824,7 @@
 #if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN)
 /* place to hold the signature string for a PNG file. */
 #ifdef PNG_USE_GLOBAL_ARRAYS
-   PNG_EXPORT_VAR (const png_byte FARDATA) png_sig[8];
+   PNG_EXPORT_VAR (PNG_CONST png_byte FARDATA) png_sig[8];
 #else
 #if 0
 #define png_sig png_sig_bytes(NULL)
@@ -2829,50 +2836,50 @@
  * define the name here, and add an invocation of the macro in png.c and
  * wherever it's needed.
  */
-#define PNG_IHDR const png_byte png_IHDR[5] = { 73,  72,  68,  82, '\0'}
-#define PNG_IDAT const png_byte png_IDAT[5] = { 73,  68,  65,  84, '\0'}
-#define PNG_IEND const png_byte png_IEND[5] = { 73,  69,  78,  68, '\0'}
-#define PNG_PLTE const png_byte png_PLTE[5] = { 80,  76,  84,  69, '\0'}
-#define PNG_bKGD const png_byte png_bKGD[5] = { 98,  75,  71,  68, '\0'}
-#define PNG_cHRM const png_byte png_cHRM[5] = { 99,  72,  82,  77, '\0'}
-#define PNG_gAMA const png_byte png_gAMA[5] = {103,  65,  77,  65, '\0'}
-#define PNG_hIST const png_byte png_hIST[5] = {104,  73,  83,  84, '\0'}
-#define PNG_iCCP const png_byte png_iCCP[5] = {105,  67,  67,  80, '\0'}
-#define PNG_iTXt const png_byte png_iTXt[5] = {105,  84,  88, 116, '\0'}
-#define PNG_oFFs const png_byte png_oFFs[5] = {111,  70,  70, 115, '\0'}
-#define PNG_pCAL const png_byte png_pCAL[5] = {112,  67,  65,  76, '\0'}
-#define PNG_sCAL const png_byte png_sCAL[5] = {115,  67,  65,  76, '\0'}
-#define PNG_pHYs const png_byte png_pHYs[5] = {112,  72,  89, 115, '\0'}
-#define PNG_sBIT const png_byte png_sBIT[5] = {115,  66,  73,  84, '\0'}
-#define PNG_sPLT const png_byte png_sPLT[5] = {115,  80,  76,  84, '\0'}
-#define PNG_sRGB const png_byte png_sRGB[5] = {115,  82,  71,  66, '\0'}
-#define PNG_tEXt const png_byte png_tEXt[5] = {116,  69,  88, 116, '\0'}
-#define PNG_tIME const png_byte png_tIME[5] = {116,  73,  77,  69, '\0'}
-#define PNG_tRNS const png_byte png_tRNS[5] = {116,  82,  78,  83, '\0'}
-#define PNG_zTXt const png_byte png_zTXt[5] = {122,  84,  88, 116, '\0'}
+#define PNG_IHDR png_byte png_IHDR[5] = { 73,  72,  68,  82, '\0'}
+#define PNG_IDAT png_byte png_IDAT[5] = { 73,  68,  65,  84, '\0'}
+#define PNG_IEND png_byte png_IEND[5] = { 73,  69,  78,  68, '\0'}
+#define PNG_PLTE png_byte png_PLTE[5] = { 80,  76,  84,  69, '\0'}
+#define PNG_bKGD png_byte png_bKGD[5] = { 98,  75,  71,  68, '\0'}
+#define PNG_cHRM png_byte png_cHRM[5] = { 99,  72,  82,  77, '\0'}
+#define PNG_gAMA png_byte png_gAMA[5] = {103,  65,  77,  65, '\0'}
+#define PNG_hIST png_byte png_hIST[5] = {104,  73,  83,  84, '\0'}
+#define PNG_iCCP png_byte png_iCCP[5] = {105,  67,  67,  80, '\0'}
+#define PNG_iTXt png_byte png_iTXt[5] = {105,  84,  88, 116, '\0'}
+#define PNG_oFFs png_byte png_oFFs[5] = {111,  70,  70, 115, '\0'}
+#define PNG_pCAL png_byte png_pCAL[5] = {112,  67,  65,  76, '\0'}
+#define PNG_sCAL png_byte png_sCAL[5] = {115,  67,  65,  76, '\0'}
+#define PNG_pHYs png_byte png_pHYs[5] = {112,  72,  89, 115, '\0'}
+#define PNG_sBIT png_byte png_sBIT[5] = {115,  66,  73,  84, '\0'}
+#define PNG_sPLT png_byte png_sPLT[5] = {115,  80,  76,  84, '\0'}
+#define PNG_sRGB png_byte png_sRGB[5] = {115,  82,  71,  66, '\0'}
+#define PNG_tEXt png_byte png_tEXt[5] = {116,  69,  88, 116, '\0'}
+#define PNG_tIME png_byte png_tIME[5] = {116,  73,  77,  69, '\0'}
+#define PNG_tRNS png_byte png_tRNS[5] = {116,  82,  78,  83, '\0'}
+#define PNG_zTXt png_byte png_zTXt[5] = {122,  84,  88, 116, '\0'}
 
 #ifdef PNG_USE_GLOBAL_ARRAYS
-PNG_EXPORT_VAR (const png_byte FARDATA) png_IHDR[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_IDAT[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_IEND[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_PLTE[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_bKGD[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_cHRM[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_gAMA[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_hIST[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_iCCP[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_iTXt[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_oFFs[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_pCAL[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_sCAL[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_pHYs[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_sBIT[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_sPLT[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_sRGB[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_tEXt[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_tIME[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_tRNS[5];
-PNG_EXPORT_VAR (const png_byte FARDATA) png_zTXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IHDR[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IDAT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IEND[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_PLTE[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_bKGD[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_cHRM[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_gAMA[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_hIST[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_iCCP[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_iTXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_oFFs[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_pCAL[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sCAL[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_pHYs[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sBIT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sPLT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sRGB[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tEXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tIME[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tRNS[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_zTXt[5];
 #endif /* PNG_USE_GLOBAL_ARRAYS */
 
 #if defined(PNG_1_0_X) || defined (PNG_1_2_X)
diff --git a/pngconf.h b/pngconf.h
index 35d6316..8df01a2 100644
--- a/pngconf.h
+++ b/pngconf.h
@@ -1,7 +1,7 @@
 
 /* pngconf.h - machine configurable file for libpng
  *
- * libpng version 1.0.26 - May 15, 2007
+ * libpng version 1.0.27rc1 - July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -270,6 +270,7 @@
 #  define PNGARG(arglist) arglist
 #endif /* _NO_PROTO */
 
+
 #endif /* OF */
 
 #endif /* PNGARG */
@@ -588,16 +589,16 @@
 #endif /* PNG_READ_TRANSFORMS_SUPPORTED */
 
 #if !defined(PNG_NO_PROGRESSIVE_READ) && \
- !defined(PNG_PROGRESSIVE_READ_NOT_SUPPORTED)  /* if you don't do progressive */
-#  define PNG_PROGRESSIVE_READ_SUPPORTED     /* reading.  This is not talking */
-#endif                               /* about interlacing capability!  You'll */
-              /* still have interlacing unless you change the following line: */
+ !defined(PNG_PROGRESSIVE_READ_NOT_SUPPORTED) /* if you don't do progressive */
+#  define PNG_PROGRESSIVE_READ_SUPPORTED    /* reading.  This is not talking */
+#endif                              /* about interlacing capability!  You'll */
+             /* still have interlacing unless you change the following line: */
 
-#define PNG_READ_INTERLACING_SUPPORTED /* required for PNG-compliant decoders */
+#define PNG_READ_INTERLACING_SUPPORTED /* required in PNG-compliant decoders */
 
 #ifndef PNG_NO_READ_COMPOSITE_NODIV
 #  ifndef PNG_NO_READ_COMPOSITED_NODIV  /* libpng-1.0.x misspelling */
-#    define PNG_READ_COMPOSITE_NODIV_SUPPORTED   /* well tested on Intel, SGI */
+#    define PNG_READ_COMPOSITE_NODIV_SUPPORTED  /* well tested on Intel, SGI */
 #  endif
 #endif
 
@@ -717,35 +718,46 @@
 /* PNG_ASSEMBLER_CODE was enabled by default in version 1.2.0 
  * even when PNG_USE_PNGVCRD or PNG_USE_PNGGCCRD is not defined.
  *
- * PNG_NO_ASSEMBLER_CODE disables use of all assembler code and optimized C,
- * and removes or includes several functions in the API.
+ * PNG_NO_ASSEMBLER_CODE disables use of all assembler code,
+ * and removes several functions from the API.
  *
  * PNG_NO_MMX_CODE disables the use of MMX code without changing the API.
- * When MMX code is off, then optimized C replacement functions are used.
+ * When MMX code is off, then optimized C replacement functions are used,
+ * if PNG_NO_OPTIMIZED_CODE is not enabled.  This was added in version
+ * 1.2.19.
 */
+
+#if defined(PNG_READ_SUPPORTED) && !defined(PNG_NO_OPTIMIZED_CODE)
+#  ifndef PNG_OPTIMIZED_CODE_SUPPORTED
+#    define PNG_OPTIMIZED_CODE_SUPPORTED
+#  endif
+#endif
+
 #if defined(PNG_READ_SUPPORTED) && !defined(PNG_NO_ASSEMBLER_CODE)
 #  ifndef PNG_ASSEMBLER_CODE_SUPPORTED
 #    define PNG_ASSEMBLER_CODE_SUPPORTED
 #  endif
-#  if defined(XP_MACOSX) && !defined(PNG_NO_MMX_CODE)
-     /* work around Intel-Mac compiler bug */
-#    define PNG_NO_MMX_CODE
-#  endif
-#  if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE) && \
-     defined(__MMX__)
+
+#  if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
 #    define PNG_MMX_CODE_SUPPORTED
 #  endif
-#  if !defined(PNG_USE_PNGGCCRD) && !defined(PNG_NO_MMX_CODE) && \
-     !defined(PNG_USE_PNGVCRD) && defined(__MMX__)
-#    define PNG_USE_PNGGCCRD
-#  endif
-#endif
 
-/* If you are sure that you don't need thread safety and you are compiling
-   with PNG_USE_PNGCCRD for an MMX application, you can define this for
-   faster execution.  See pnggccrd.c.
-#define PNG_THREAD_UNSAFE_OK
-*/
+#  if !defined(PNG_USE_PNGVCRD) && defined(PNG_MMX_CODE_SUPPORTED) && \
+     defined(_MSC_VER)
+#    define PNG_USE_PNGVCRD
+#  endif
+
+#  if !defined(PNG_USE_PNGGCCRD) && defined(PNG_MMX_CODE_SUPPORTED) && \
+     !defined(PNG_USE_PNGVCRD)
+#    define PNG_USE_PNGGCCRD
+     /* If you are sure that you don't need thread safety and you are compiling
+        with PNG_USE_PNGCCRD for an MMX application, you can define this for
+        faster execution.  See pnggccrd.c.
+#    define PNG_THREAD_UNSAFE_OK
+     */
+#  endif
+
+#endif
 
 #if !defined(PNG_1_0_X)
 #if !defined(PNG_NO_USER_MEM) && !defined(PNG_USER_MEM_SUPPORTED)
@@ -1404,6 +1416,7 @@
 #  define NOCHECK 0
 #  define CVT_PTR(ptr) (png_far_to_near(png_ptr,ptr,CHECK))
 #  define CVT_PTR_NOCHECK(ptr) (png_far_to_near(png_ptr,ptr,NOCHECK))
+#  define png_snprintf _fsnprintf   /* Added to v 1.2.19 */
 #  define png_strcpy  _fstrcpy
 #  define png_strncpy _fstrncpy   /* Added to v 1.2.6 */
 #  define png_strlen  _fstrlen
@@ -1413,6 +1426,27 @@
 #else /* use the usual functions */
 #  define CVT_PTR(ptr)         (ptr)
 #  define CVT_PTR_NOCHECK(ptr) (ptr)
+#  ifndef PNG_NO_SNPRINTF
+#    ifdef _MSC_VER
+#      define png_snprintf _snprintf   /* Added to v 1.2.19 */
+#      define png_snprintf2 _snprintf
+#      define png_snprintf6 _snprintf
+#    else
+#      define png_snprintf snprintf   /* Added to v 1.2.19 */
+#      define png_snprintf2 snprintf
+#      define png_snprintf6 snprintf
+#    endif
+#  else
+     /* You don't have or don't want to use snprintf().  Caution: Using
+      * sprintf instead of snprintf exposes your application to accidental
+      * or malevolent buffer overflows.  If you don't have snprintf()
+      * as a general rule you should provide one (you can get one from
+      * Portable OpenSSH). */
+#    define png_snprintf(s1,n,fmt,x1) sprintf(s1,fmt,x1)
+#    define png_snprintf2(s1,n,fmt,x1,x2) sprintf(s1,fmt,x1,x2)
+#    define png_snprintf6(s1,n,fmt,x1,x2,x3,x4,x5,x6) \
+        sprintf(s1,fmt,x1,x2,x3,x4,x5,x6)
+#  endif
 #  define png_strcpy  strcpy
 #  define png_strncpy strncpy     /* Added to v 1.2.6 */
 #  define png_strlen  strlen
@@ -1434,39 +1468,48 @@
 /* Prior to libpng-1.0.9, this block was in pngasmrd.h */
 #if defined(PNG_INTERNAL)
 
-/* These are the default thresholds before the MMX code kicks in; if either
- * rowbytes or bitdepth is below the threshold, plain C code is used.  These
- * can be overridden at runtime via the png_set_mmx_thresholds() call in
- * libpng 1.2.0 and later.  The values below were chosen by Intel.
- */
-
-#ifndef PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT
-#  define PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT  128  /*  >=  */
-#endif
-#ifndef PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT
-#  define PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT  9    /*  >=  */   
-#endif
-
-/* Set this in the makefile for VC++ on Pentium, not here. */
-/* Platform must be Pentium.  Makefile must assemble and load pngvcrd.c .
- * MMX will be detected at run time and used if present.
- */
-#ifdef PNG_USE_PNGVCRD
-#  define PNG_HAVE_MMX_COMBINE_ROW
-#  define PNG_HAVE_MMX_READ_INTERLACE
-#  define PNG_HAVE_MMX_READ_FILTER_ROW
-#endif
-
-/* Set this in the makefile for gcc/as on Pentium, not here. */
-/* Platform must be Pentium.  Makefile must assemble and load pnggccrd.c .
- * MMX will be detected at run time and used if present.
- */
-#ifdef PNG_USE_PNGGCCRD
-#  define PNG_HAVE_MMX_COMBINE_ROW
-#  define PNG_HAVE_MMX_READ_INTERLACE
-#  define PNG_HAVE_MMX_READ_FILTER_ROW
-#endif
-/* - see pnggccrd.c for info about what is currently enabled */
+#if defined(PNG_USE_PNGGCCRD) || defined(PNG_USE_PNGVCRD)
+  /* Platform must be Pentium.  Makefile must assemble and load
+   * pnggccrd.c or  pngvcrd.c. MMX will be detected at run time and
+   * used if present.
+   */
+#  ifndef PNG_NO_MMX_COMBINE_ROW
+#    define PNG_HAVE_MMX_COMBINE_ROW
+#  endif
+#  ifndef PNG_NO_MMX_READ_INTERLACE
+#    define PNG_HAVE_MMX_READ_INTERLACE
+#  endif
+#  ifndef PNG_NO_MMX_READ_FILTER_ROW
+#    define PNG_HAVE_MMX_READ_FILTER_ROW
+#    ifndef PNG_NO_MMX_FILTER_SUB
+#      define PNG_MMX_READ_FILTER_SUB_SUPPORTED
+#    endif
+#    if !(defined(__GNUC__) && defined(__x86_64__) && (__GNUC__ < 4))
+       /* work around 64-bit gcc compiler bugs in gcc-3.x */
+#      ifndef PNG_NO_MMX_FILTER_UP
+#        define PNG_MMX_READ_FILTER_UP_SUPPORTED
+#      endif
+#      ifndef PNG_NO_MMX_FILTER_AVG
+#        define PNG_MMX_READ_FILTER_AVG_SUPPORTED
+#      endif
+#      ifndef PNG_NO_MMX_FILTER_PAETH
+#        define PNG_MMX_READ_FILTER_PAETH_SUPPORTED
+#      endif
+#    endif /* !((__x86_64__) && (GNUC < 4)) */
+#  endif
+  /* These are the default thresholds before the MMX code kicks in; if either
+   * rowbytes or bitdepth is below the threshold, plain C code is used.  These
+   * can be overridden at runtime via the png_set_mmx_thresholds() call in
+   * libpng 1.2.0 and later.  The values below were chosen by Intel.
+   */
+#  ifndef PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT
+#    define PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT  128  /*  >=  */
+#  endif
+#  ifndef PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT
+#    define PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT  9    /*  >=  */   
+#  endif
+#endif /* PNG_USE_PNGGCCRD || PNG_USE_PNGVCRD */
+/* - see pngvcrd.c or pnggccrd.c for info about what is currently enabled */
 
 #endif /* PNG_INTERNAL */
 #endif /* PNG_READ_SUPPORTED */
diff --git a/pngerror.c b/pngerror.c
index f50f653..b0b366b 100644
--- a/pngerror.c
+++ b/pngerror.c
@@ -1,9 +1,9 @@
 
 /* pngerror.c - stub functions for i/o and memory allocation
  *
- * Last changed in libpng 1.2.13 November 13, 2006
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
- * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
  * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
  *
@@ -20,9 +20,11 @@
 static void /* PRIVATE */
 png_default_error PNGARG((png_structp png_ptr,
   png_const_charp error_message));
+#ifndef PNG_NO_WARNINGS
 static void /* PRIVATE */
 png_default_warning PNGARG((png_structp png_ptr,
   png_const_charp warning_message));
+#endif /* PNG_NO_WARNINGS */
 
 /* This function is called whenever there is a fatal error.  This function
  * should not be changed.  If there is a need to handle errors differently,
@@ -76,6 +78,7 @@
    png_default_error(png_ptr, error_message);
 }
 
+#ifndef PNG_NO_WARNINGS
 /* This function is called whenever there is a non-fatal error.  This function
  * should not be changed.  If there is a need to handle warnings differently,
  * you should supply a replacement warning function and use
@@ -105,6 +108,8 @@
    else
       png_default_warning(png_ptr, warning_message+offset);
 }
+#endif /* PNG_NO_WARNINGS */
+
 
 /* These utilities are used internally to build an error message that relates
  * to the current chunk.  The chunk name comes from png_ptr->chunk_name,
@@ -113,7 +118,7 @@
  * if the character is invalid.
  */
 #define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
-const static PNG_CONST char png_digit[16] = {
+static PNG_CONST char png_digit[16] = {
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    'A', 'B', 'C', 'D', 'E', 'F'
 };
@@ -151,6 +156,7 @@
    }
 }
 
+#ifdef PNG_READ_SUPPORTED
 void PNGAPI
 png_chunk_error(png_structp png_ptr, png_const_charp error_message)
 {
@@ -164,6 +170,7 @@
    }
 }
 
+#ifndef PNG_NO_WARNINGS
 void PNGAPI
 png_chunk_warning(png_structp png_ptr, png_const_charp warning_message)
 {
@@ -176,6 +183,9 @@
      png_warning(png_ptr, msg);
    }
 }
+#endif /* PNG_NO_WARNINGS */
+
+#endif /* PNG_READ_SUPPORTED */
 
 /* This is the default error handling function.  Note that replacements for
  * this function MUST NOT RETURN, or the program will likely crash.  This
@@ -217,7 +227,7 @@
 #  ifdef USE_FAR_KEYWORD
    {
       jmp_buf jmpbuf;
-      png_memcpy(jmpbuf,png_ptr->jmpbuf,png_sizeof(jmp_buf));
+      png_memcpy(jmpbuf, png_ptr->jmpbuf, png_sizeof(jmp_buf));
       longjmp(jmpbuf, 1);
    }
 #  else
@@ -228,12 +238,11 @@
    PNG_ABORT();
 #endif
 #ifdef PNG_NO_CONSOLE_IO
-   /* make compiler happy */ ;
-   if (&error_message != NULL)
-      return;
+   error_message = error_message; /* make compiler happy */
 #endif
 }
 
+#ifndef PNG_NO_WARNINGS
 /* This function is called when there is a warning, but the library thinks
  * it can continue anyway.  Replacement functions don't have to do anything
  * here if you don't want them to.  In the default configuration, png_ptr is
@@ -267,14 +276,11 @@
 #  endif
      fprintf(stderr, "libpng warning: %s\n", warning_message);
 #else
-   /* make compiler happy */ ;
-   if (warning_message)
-     return;
+   warning_message = warning_message; /* make compiler happy */
 #endif
-   /* make compiler happy */ ;
-   if (png_ptr)
-      return;
+   png_ptr = png_ptr; /* make compiler happy */
 }
+#endif /* PNG_NO_WARNINGS */
 
 /* This function is called when the application wants to use another method
  * of handling errors and warnings.  Note that the error function MUST NOT
diff --git a/pnggccrd.c b/pnggccrd.c
index fab523c..a13e0a5 100644
--- a/pnggccrd.c
+++ b/pnggccrd.c
@@ -1,51 +1,50 @@
 
 /* pnggccrd.c - mixed C/assembler version of utilities to read a PNG file
  *
- * For Intel x86 CPU (Pentium-MMX or later) and GNU C compiler.
+ * For Intel/AMD x86 or x86-64 CPU (Pentium-MMX or later) and GNU C compiler.
  *
- *     See http://www.intel.com/drg/pentiumII/appnotes/916/916.htm
- *     and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
- *     for Intel's performance analysis of the MMX vs. non-MMX code.
- *
- * Last changed in libpng 1.2.15 January 5, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998 Intel Corporation
+ * Copyright (c) 1999-2002,2007 Greg Roelofs
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
- * Copyright (c) 1998, Intel Corporation
  *
  * Based on MSVC code contributed by Nirav Chhatrapati, Intel Corp., 1998.
  * Interface to libpng contributed by Gilles Vollant, 1999.
  * GNU C port by Greg Roelofs, 1999-2001.
  *
- * Lines 2350-4300 converted in place with intel2gas 1.3.1:
+ * References:
  *
- *   intel2gas -mdI pnggccrd.c.partially-msvc -o pnggccrd.c
+ *     http://www.intel.com/drg/pentiumII/appnotes/916/916.htm
+ *     http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
+ *       [Intel's performance analysis of the MMX vs. non-MMX code;
+ *        moved/deleted as of 2006, but text and some graphs still
+ *        available via WayBack Machine at archive.org]
+ *
+ *     http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html
+ *     http://sam.zoy.org/blog/2007-04-13-shlib-with-non-pic-code-have-inline-assembly-and-pic-mix-well
+ *     http://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
+ *     http://gcc.gnu.org/onlinedocs/gcc/Variable-Attributes.html
+ *     http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+ *     AMD64 Architecture Programmer's Manual, volumes 1 and 5
+ *       [http://www.amd.com/us-en/Processors/TechnicalResources/0,,30_182_739_7044,00.html]
+ *     Intel 64 and IA-32 Software Developer's Manuals
+ *       [http://developer.intel.com/products/processor/manuals/]
+ *
+ * png_read_filter_row_mmx_*() were converted in place with intel2gas 1.3.1:
+ *
+ *     intel2gas -mdI pnggccrd.c.partially-msvc -o pnggccrd.c
  *
  * and then cleaned up by hand.  See http://hermes.terminal.at/intel2gas/ .
  *
  * NOTE:  A sufficiently recent version of GNU as (or as.exe under DOS/Windows)
- *        is required to assemble the newer MMX instructions such as movq.
- *        For djgpp, see
- *
- *           ftp://ftp.simtel.net/pub/simtelnet/gnu/djgpp/v2gnu/bnu281b.zip
- *
- *        (or a later version in the same directory).  For Linux, check your
- *        distribution's web site(s) or try these links:
- *
- *           http://rufus.w3.org/linux/RPM/binutils.html
- *           http://www.debian.org/Packages/stable/devel/binutils.html
- *           ftp://ftp.slackware.com/pub/linux/slackware/slackware/slakware/d1/
- *             binutils.tgz
- *
- *        For other platforms, see the main GNU site:
- *
- *           ftp://ftp.gnu.org/pub/gnu/binutils/
- *
- *        Version 2.5.2l.15 is definitely too old...
+ * is required to assemble the newer asm instructions such as movq.  (Version
+ * 2.5.2l.15 is definitely too old.)  See ftp://ftp.gnu.org/pub/gnu/binutils/ .
  */
 
 /*
- * TEMPORARY PORTING NOTES AND CHANGELOG (mostly by Greg Roelofs)
- * =====================================
+ * PORTING NOTES AND CHANGELOG (mostly by Greg Roelofs)
+ * ===========================
  *
  * 19991006:
  *  - fixed sign error in post-MMX cleanup code (16- & 32-bit cases)
@@ -56,7 +55,7 @@
  *     - write MMX code for 48-bit case (pixel_bytes == 6)
  *     - figure out what's up with 24-bit case (pixel_bytes == 3):
  *        why subtract 8 from width_mmx in the pass 4/5 case?
- *        (only width_mmx case) (near line 1606)
+ *        (only width_mmx case) (near line 2335)
  *     x [DONE] replace pixel_bytes within each block with the true
  *        constant value (or are compilers smart enough to do that?)
  *     - rewrite all MMX interlacing code so it's aligned with
@@ -75,8 +74,8 @@
  *     inconsistent, and don't match the MMX Programmer's Reference
  *     Manual conventions anyway.  They should be changed to
  *     "b7 b6 b5 b4 b3 b2 b1 b0," where b0 indicates the byte that
- *     was lowest in memory (e.g., corresponding to a left pixel)
- *     and b7 is the byte that was highest (e.g., a right pixel).
+ *     was lowest in memory (i.e., corresponding to a left pixel)
+ *     and b7 is the byte that was highest (i.e., a right pixel).
  *
  * 19991016:
  *  - Brennan's Guide notwithstanding, gcc under Linux does *not*
@@ -88,6 +87,10 @@
  *     defined within the scope of a single function, but both
  *     static and truly global (multi-module) variables work fine.
  *
+ * 19991017:
+ *  - replaced pixel_bytes in each png_memcpy() call with constant value for
+ *     inlining (png_do_read_interlace() "non-MMX/modified C code" block)
+ *
  * 19991023:
  *  - fixed png_combine_row() non-MMX replication bug (odd passes only?)
  *  - switched from string-concatenation-with-macros to cleaner method of
@@ -221,48 +224,249 @@
  * 20010310:
  *  - fixed buffer-overrun bug in png_combine_row() C code (non-MMX)
  *
+ * 20010808:
+ *  - added PNG_THREAD_UNSAFE_OK around code using global variables [GR-P]
+ *
+ * 20011124:
+ *  - fixed missing save of Eflag in png_mmx_support() [Maxim Sobolev]
+ *
  * 20020304:
  *  - eliminated incorrect use of width_mmx in pixel_bytes == 8 case
  *
+ * 20020407:
+ *  - fixed insufficient preservation of ebx register [Sami Farin]
+ *
  * 20040724:
- *   - more tinkering with clobber list at lines 4529 and 5033, to get
- *     it to compile on gcc-3.4.
+ *  - more tinkering with clobber list at lines 4529 and 5033 to get it to
+ *     compile with gcc 3.4 [GR-P]
+ *
+ * 20040809:
+ *  - added "rim" definitions for CONST4 and CONST6 [GR-P]
+ *
+ * 20060303:
+ *  - added "OS2" to list of systems that don't need leading underscores [GR-P]
+ *
+ * 20060320:
+ *  - made PIC-compliant [Christian Aichinger]
+ *
+ * 20070313:
+ *  - finally applied Giuseppe Ghibò's 64-bit patch of 20060803 (completely
+ *     overlooked Dylan Alex Simon's similar patch of 20060414, oops...)
+ *
+ * 20070524:
+ *  - fixed link failure caused by asm-only variables being optimized out
+ *     (identified by Dimitri of Trolltech) with __attribute__((used)), which
+ *     also gets rid of warnings => nuked ugly png_squelch_warnings() hack
+ *  - dropped redundant ifdef
+ *  - moved png_mmx_support() back up where originally intended (as in
+ *     pngvcrd.c), using __attribute__((noinline)) in extra prototype
+ *
+ * 20070527:
+ *  - revised png_combine_row() to reuse mask in lieu of external _unmask
+ *  - moved 32-bit (RGBA) case to top of png_combine_row():  most common
+ *  - just about ready to give up on x86-64 -fPIC mode; can't even access 16
+ *     _mask*_* constants without triggering link error on shared library:
+ *       /usr/bin/ld: pnggccrd.pic.o: relocation R_X86_64_32S against `a local
+ *         symbol' can not be used when making a shared object; recompile with
+ *         -fPIC
+ *       pnggccrd.pic.o: could not read symbols: Bad value
+ *       ("objdump -x pnggccrd.pic.o | grep rodata" to verify)
+ *     [might be able to work around by doing within assembly code whatever
+ *     -fPIC does, but given problems to date, seems like long shot...]
+ *     [relevant ifdefs:  __x86_64__ && __PIC__ => C code only]
+ *  - changed #if 0 to #ifdef PNG_CLOBBER_MMX_REGS_SUPPORTED in case gcc ever
+ *     supports MMX regs (%mm0, etc.) in clobber list (not supported by gcc
+ *     2.7.2.3, 2.91.66 (egcs 1.1.2), 3.x, or 4.1.2)
+ *
+ * 20070603:
+ *  - revised png_combine_row() to use @GOTPCREL(%%rip) addressing on _c64
+ *     struct of _mask*_* constants for x86-64 -fPIC; see sam.zoy.org link
+ *     above for details
+ *  - moved _const4 and _const6 into _c64 struct, renamed to _amask5_3_0 and
+ *     _amask7_1_0, respectively
+ *  - can't figure out how to use _c64._mask*_* vars within asm code, so still
+ *     need single variables for non-x86-64/-fPIC half :-(
+ *  - replaced various __PIC__ ifdefs with *_GOT_ebx macros
+ *  - moved _LBCarryMask and _HBClearMask into _c64 struct
+ *  - conditionally replaced _p*temp variables with %r11d-%r13d (via p*_TEMP
+ *     and CLOBBER_r1*d macros)
+ *
+ * 20070604:
+ *  - replaced all _ActiveMask and _ActiveMaskEnd with new _amask*_*_* consts
+ *     (_amask naming convention:  numbers of 00-bytes, ff-bytes, 00-bytes)
+ *    - _ActiveMask     // (10) // avg/paeth/sub; read-only; consts; movq/pand
+ *       0x0000000000ffffffLL (bpp 3, avg)      _amask5_3_0
+ *       0xffffffffffffffffLL (bpp 4, 6, avg)   _amask0_8_0
+ *       0x000000000000ffffLL (bpp 2, avg)      _amask6_2_0
+ *       0x0000000000ffffffLL (bpp 3, paeth)    _amask5_3_0
+ *       0x00000000ffffffffLL (bpp 6, paeth)    _amask4_4_0
+ *       0x00000000ffffffffLL (bpp 4, paeth)    _amask4_4_0
+ *       0x00000000ffffffffLL (bpp 8, paeth)    _amask4_4_0
+ *       0x0000ffffff000000LL (bpp 3, sub)      _amask2_3_3
+ *       0x00000000ffff0000LL (bpp 2, sub)      _amask4_2_2
+ *    - _ActiveMaskEnd  // (1)  // paeth only; read-only; const; pand
+ *       0xffff000000000000LL (bpp 3, paeth)    _amask0_2_6
+ *  - changed all "#if defined(__x86_64__) // later // && defined(__PIC__)"
+ *     lines to "#ifdef PNG_x86_64_USE_GOTPCREL" for easier/safer testing
+ *
+ * 20070605:
+ *  - merged PNG_x86_64_USE_GOTPCREL, non-PNG_x86_64_USE_GOTPCREL code via
+ *     *MASK* and LOAD/RESTORE macros
+ *
+ * 20070607:
+ *  - replaced all constant instances of _ShiftBpp, _ShiftRem with immediates
+ *     (still have two shared cases in avg, sub routines)
+ *
+ * 20070609:
+ *  - replaced remaining instances of _ShiftBpp, _ShiftRem with immediates
+ *     (split sub and avg 4/6-bpp cases into separate blocks)
+ *  - fixed paeth bug due to clobbered r11/r12/r13 regs
+ *
+ * 20070610:
+ *  - made global "_dif" variable (avg/paeth/sub routines) local again (now
+ *     "diff"--see 19991120 entry above), using register constraints
+ *  - note that %ebp in clobber list doesn't actually work, at least for 32-bit
+ *     version and gcc 4.1.2; must save and restore manually.  (Seems to work
+ *     OK for 64-bit version and gcc 3.4.3, but gcc may not be using ebp/rbp
+ *     in that case.)
+ *  - started replacing direct _MMXLength accesses with register constraints
+ *
+ * 20070612:
+ *  - continued replacing direct _MMXLength accesses with register constraints
+ *
+ * 20070613:
+ *  - finished replacing direct _MMXLength accesses with register constraints;
+ *     switched to local variable (and renamed back to MMXLength)
+ *
+ * 20070614:
+ *  - fixed sub bpp = 1 bug
+ *  - started replacing direct _FullLength accesses with register constraints
+ *
+ * 20070615:
+ *  - fixed 64-bit paeth bpp 3 crash bug (misplaced LOAD_GOT_rbp)
+ *  - fixed 64-bit paeth bpp 1/2 and cleanup-block crash bugs (misplaced
+ *     RESTORE_r11_r12_r13)
+ *  - slightly optimized avg/paeth cleanup blocks and paeth bpp 1/2 block
+ *     (save/restore ebx only if needed)
+ *  - continued replacing direct _FullLength accesses with register constraints
+ *
+ * 20070616:
+ *  - finished replacing direct _FullLength accesses with register constraints
+ *     (*ugly* conditional clobber-separator macros for avg and paeth, sigh)
+ *
+ * 20070618:
+ *  - fixed misplaced PNG_THREAD_UNSAFE_OK endif (was missing LOAD_GOT_rbp/
+ *     RESTORE_rbp in 32-bit thread-safe case)
+ *  - changed all "ifdef *" to "if defined(*)" [GR-P]
+ *
+ * 20070619:
+ *  - rearranged most bitdepth-related case statements to put most frequent
+ *     cases at top (24-bit, 32-bit, 8-bit, rest)
+ *
+ * 20070623:
+ *  - cleaned up png_debug() warnings/formatting
+ *  - removed PNG_MMX_CODE_SUPPORTED ifdefs and added outer __GNUC__ ifdef
+ *     (module no longer used by non-x86/non-GCC builds as of libpng 1.2.19)
+ *  - removed single libpng-1.2.x PNG_DEBUG dependency on 1.0.x png_struct
+ *     member (row_buf_size)
+ *  - rearranged pass-related if-blocks in png_do_read_interlace() to put most
+ *     frequent cases (4, 5) at top [GR-P suggestion]
+ *
+ * 20070624-29:
+ *  - fixed 64-bit crash bug:  pointers -> rsi/rdi, not esi/edi (switched to
+ *     %0/%1/%2/%3/%4 notation; eliminated size suffixes from relevant add/
+ *     inc/sub/mov instructions; changed dummy vars to pointers)
+ *     - png_combine_row()
+ *     - png_do_read_interlace()
+ *     - png_read_filter_row_mmx_avg()
+ *     - png_read_filter_row_mmx_paeth()
+ *     - png_read_filter_row_mmx_sub()
+ *     - png_read_filter_row_mmx_up()
+ *  - NOTE:  this fix makes use of the fact that modifying a 32-bit reg (e.g.,
+ *     %%ebx) clears the top half of its corresponding 64-bit reg (%%rbx), so
+ *     it's safe to mix 32-bit operations with 64-bit base/index addressing
+ *     (see new PSI/PAX/PBX/PDX/PBP/etc. "pointer-register" macros); applies
+ *     also to clobber lists
+ *
+ * 20070630:
+ *  - cleaned up formatting, macros, minor png_read_filter_row_mmx_sub() 8-bpp
+ *     register-usage inefficiency
+ *  - fixed 32-bit png_do_read_interlace() bug (was using pointer size for
+ *     64-bit dummy values)
+ *
+ * 20070703:
+ *  - added check for (manual) PIC macro to fix OpenBSD crash bug
+ *
+ * 20070717:
+ *  - fixed 48-bit png_combine_row() bug (was acting like 32-bit):  copy 6
+ *     bytes per pixel, not 4, and use stride of 6, not 4, in the second loop
+ *     of interlace processing of 48-bit pixels [GR-P]
+ *
+ * 20070722:
+ *  - fixed 64-bit png_uint_32 bug with MMXLength/FullLength temp vars
+ *
+ * [still broken:  tops of all row-filter blocks (input/output constraints);
+ *  shows up on 64-bit dynamic (-fPIC) version with -O2, especially if debug-
+ *  printfs enabled, but at right edge of odd-width images even if disabled]
+ *
  *
  * STILL TO DO:
- *     - test png_do_read_interlace() 64-bit case (pixel_bytes == 8)
- *     - write MMX code for 48-bit case (pixel_bytes == 6)
- *     - figure out what's up with 24-bit case (pixel_bytes == 3):
- *        why subtract 8 from width_mmx in the pass 4/5 case?
- *        (only width_mmx case) (near line 1606)
- *     - rewrite all MMX interlacing code so it's aligned with beginning
- *        of the row buffer, not the end (see 19991007 for details)
- *     x pick one version of mmxsupport() and get rid of the other
- *     - add error messages to any remaining bogus default cases
- *     - enable pixel_depth == 8 cases in png_read_filter_row()? (test speed)
- *     x add support for runtime enable/disable/query of various MMX routines
+ *  - fix final thread-unsafe code using stack vars and pointer? (paeth top,
+ *     default, bottom only:  default, bottom already 5 reg constraints; could
+ *     replace bpp with pointer and group bpp/patemp/pbtemp/pctemp in array)
+ *  - fix ebp/no-reg-constraint inefficiency (avg/paeth/sub top)
+ *  - test png_do_read_interlace() 64-bit case (pixel_bytes == 8)
+ *  - write MMX code for 48-bit case (pixel_bytes == 6)
+ *  - figure out what's up with 24-bit case (pixel_bytes == 3):
+ *     why subtract 8 from width_mmx in the pass 4/5 case?  due to
+ *     odd number of bytes? (only width_mmx case) (near line 2335)
+ *  - rewrite all MMX interlacing code so it's aligned with beginning
+ *     of the row buffer, not the end (see 19991007 for details)
+ *  - add error messages to any remaining bogus default cases
+ *  - enable pixel_depth == 8 cases in png_read_filter_row()? (test speed)
+ *  - try =r, etc., as reg constraints?  (would gcc use 64-bit ones on x86-64?)
+ *  - need full, non-graphical, CRC-based test suite...  maybe autogenerate
+ *     random data of various height/width/depth, compute CRCs, write (C
+ *     funcs), read (asm/MMX), recompute CRCs, and compare?
+ *  - write true x86-64 version using 128-bit "media instructions", %xmm0-15,
+ *     and extra general-purpose registers
  */
 
+#if defined(__GNUC__)
+
 #define PNG_INTERNAL
 #include "png.h"
 
+
+/* for some inexplicable reason, gcc 3.3.5 on OpenBSD (and elsewhere?) does
+ * *not* define __PIC__ when the -fPIC option is used, so we have to rely on
+ * makefiles and whatnot to define the PIC macro explicitly */
+#if defined(PIC) && !defined(__PIC__)   // (this can/should move to pngconf.h)
+#  define __PIC__
+#endif
+
 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGGCCRD)
 
+/* if you want/need full thread-safety on x86-64 even when linking statically,
+ * comment out the "&& defined(__PIC__)" part here: */
+#if defined(__x86_64__) && defined(__PIC__)
+#  define PNG_x86_64_USE_GOTPCREL            // GOTPCREL => full thread-safety
+#  define PNG_CLOBBER_x86_64_REGS_SUPPORTED  // works as of gcc 3.4.3 ...
+#endif
+
 int PNGAPI png_mmx_support(void);
 
-#ifdef PNG_USE_LOCAL_ARRAYS
-const static int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-const static int FARDATA png_pass_inc[7]   = {8, 8, 4, 4, 2, 2, 1};
-const static int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
+#if defined(PNG_USE_LOCAL_ARRAYS)
+static PNG_CONST int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+static PNG_CONST int FARDATA png_pass_inc[7]   = {8, 8, 4, 4, 2, 2, 1};
+static PNG_CONST int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
 #endif
 
-#if defined(PNG_MMX_CODE_SUPPORTED)
 /* djgpp, Win32, Cygwin, and OS2 add their own underscores to global variables,
  * so define them without: */
 #if defined(__DJGPP__) || defined(WIN32) || defined(__CYGWIN__) || \
     defined(__OS2__)
 #  define _mmx_supported  mmx_supported
-#  define _const4         const4
-#  define _const6         const6
 #  define _mask8_0        mask8_0
 #  define _mask16_1       mask16_1
 #  define _mask16_0       mask16_0
@@ -279,109 +483,466 @@
 #  define _mask48_2       mask48_2
 #  define _mask48_1       mask48_1
 #  define _mask48_0       mask48_0
+#  define _amask5_3_0     amask5_3_0
+#  define _amask7_1_0     amask7_1_0
 #  define _LBCarryMask    LBCarryMask
 #  define _HBClearMask    HBClearMask
-#  define _ActiveMask     ActiveMask
-#  define _ActiveMask2    ActiveMask2
-#  define _ActiveMaskEnd  ActiveMaskEnd
-#  define _ShiftBpp       ShiftBpp
-#  define _ShiftRem       ShiftRem
-#ifdef PNG_THREAD_UNSAFE_OK
-#  define _unmask         unmask
-#  define _FullLength     FullLength
-#  define _MMXLength      MMXLength
-#  define _dif            dif
-#  define _patemp         patemp
-#  define _pbtemp         pbtemp
-#  define _pctemp         pctemp
+#  define _amask0_8_0     amask0_8_0
+#  define _amask6_2_0     amask6_2_0
+#  define _amask4_4_0     amask4_4_0
+#  define _amask0_2_6     amask0_2_6
+#  define _amask2_3_3     amask2_3_3
+#  define _amask4_2_2     amask4_2_2
+#  if defined(PNG_THREAD_UNSAFE_OK)
+#    define _patemp       patemp
+#    define _pbtemp       pbtemp
+#    define _pctemp       pctemp
+#  endif
+#endif // djgpp, Win32, Cygwin, OS2
+
+
+/* These constants are used in the inlined MMX assembly code. */
+
+typedef unsigned long long  ull;
+
+#if defined(PNG_x86_64_USE_GOTPCREL)
+static PNG_CONST struct {
+    //ull _mask_array[26];
+
+    // png_combine_row() constants:
+    ull _mask8_0;
+    ull _mask16_0, _mask16_1;
+    ull _mask24_0, _mask24_1, _mask24_2;
+    ull _mask32_0, _mask32_1, _mask32_2, _mask32_3;
+    ull _mask48_0, _mask48_1, _mask48_2, _mask48_3, _mask48_4, _mask48_5;
+
+    // png_do_read_interlace() constants:
+    ull _amask5_3_0, _amask7_1_0;  // was _const4 and _const6, respectively
+
+    // png_read_filter_row_mmx_avg() constants (also uses _amask5_3_0):
+    ull _LBCarryMask, _HBClearMask;
+    ull _amask0_8_0, _amask6_2_0;  // was ActiveMask for bpp 4/6 and 2 cases
+
+    // png_read_filter_row_mmx_paeth() constants (also uses _amask5_3_0):
+    ull _amask4_4_0, _amask0_2_6;  // was ActiveMask{,End} for bpp 6/4/8 and 3
+
+    // png_read_filter_row_mmx_sub() constants:
+    ull _amask2_3_3, _amask4_2_2;  // was ActiveMask for bpp 3 and 2 cases
+
+} _c64 __attribute__((used, aligned(8))) = {
+
+    // png_combine_row() constants:
+    0x0102040810204080LL, // _mask8_0      offset 0
+
+    0x1010202040408080LL, // _mask16_0     offset 8
+    0x0101020204040808LL, // _mask16_1     offset 16
+
+    0x2020404040808080LL, // _mask24_0     offset 24
+    0x0408080810101020LL, // _mask24_1     offset 32
+    0x0101010202020404LL, // _mask24_2     offset 40
+
+    0x4040404080808080LL, // _mask32_0     offset 48
+    0x1010101020202020LL, // _mask32_1     offset 56
+    0x0404040408080808LL, // _mask32_2     offset 64
+    0x0101010102020202LL, // _mask32_3     offset 72
+
+    0x4040808080808080LL, // _mask48_0     offset 80
+    0x2020202040404040LL, // _mask48_1     offset 88
+    0x1010101010102020LL, // _mask48_2     offset 96
+    0x0404080808080808LL, // _mask48_3     offset 104
+    0x0202020204040404LL, // _mask48_4     offset 112
+    0x0101010101010202LL, // _mask48_5     offset 120
+
+    // png_do_read_interlace() constants:
+    0x0000000000FFFFFFLL, // _amask5_3_0   offset 128  (bpp 3, avg/paeth) const4
+    0x00000000000000FFLL, // _amask7_1_0   offset 136                     const6
+
+    // png_read_filter_row_mmx_avg() constants:
+    0x0101010101010101LL, // _LBCarryMask  offset 144
+    0x7F7F7F7F7F7F7F7FLL, // _HBClearMask  offset 152
+    0xFFFFFFFFFFFFFFFFLL, // _amask0_8_0   offset 160  (bpp 4/6, avg)
+    0x000000000000FFFFLL, // _amask6_2_0   offset 168  (bpp 2,   avg)
+
+    // png_read_filter_row_mmx_paeth() constants:
+    0x00000000FFFFFFFFLL, // _amask4_4_0   offset 176  (bpp 6/4/8, paeth)
+    0xFFFF000000000000LL, // _amask0_2_6   offset 184  (bpp 3, paeth)   A.M.End
+
+    // png_read_filter_row_mmx_sub() constants:
+    0x0000FFFFFF000000LL, // _amask2_3_3   offset 192  (bpp 3, sub)
+    0x00000000FFFF0000LL, // _amask4_2_2   offset 200  (bpp 2, sub)
+
+};
+
+#define MASK8_0        "(%%rbp)"
+#define MASK16_0       "8(%%rbp)"
+#define MASK16_1       "16(%%rbp)"
+#define MASK24_0       "24(%%rbp)"
+#define MASK24_1       "32(%%rbp)"
+#define MASK24_2       "40(%%rbp)"
+#define MASK32_0       "48(%%rbp)"
+#define MASK32_1       "56(%%rbp)"
+#define MASK32_2       "64(%%rbp)"
+#define MASK32_3       "72(%%rbp)"
+#define MASK48_0       "80(%%rbp)"
+#define MASK48_1       "88(%%rbp)"
+#define MASK48_2       "96(%%rbp)"
+#define MASK48_3       "104(%%rbp)"
+#define MASK48_4       "112(%%rbp)"
+#define MASK48_5       "120(%%rbp)"
+#define AMASK5_3_0     "128(%%rbp)"
+#define AMASK7_1_0     "136(%%rbp)"
+#define LB_CARRY_MASK  "144(%%rbp)"
+#define HB_CLEAR_MASK  "152(%%rbp)"
+#define AMASK0_8_0     "160(%%rbp)"
+#define AMASK6_2_0     "168(%%rbp)"
+#define AMASK4_4_0     "176(%%rbp)"
+#define AMASK0_2_6     "184(%%rbp)"
+#define AMASK2_3_3     "192(%%rbp)"
+#define AMASK4_2_2     "200(%%rbp)"
+
+#else // !PNG_x86_64_USE_GOTPCREL
+
+static PNG_CONST ull _mask8_0  __attribute__((used, aligned(8))) = 0x0102040810204080LL;
+
+static PNG_CONST ull _mask16_1 __attribute__((used, aligned(8))) = 0x0101020204040808LL;
+static PNG_CONST ull _mask16_0 __attribute__((used, aligned(8))) = 0x1010202040408080LL;
+
+static PNG_CONST ull _mask24_2 __attribute__((used, aligned(8))) = 0x0101010202020404LL;
+static PNG_CONST ull _mask24_1 __attribute__((used, aligned(8))) = 0x0408080810101020LL;
+static PNG_CONST ull _mask24_0 __attribute__((used, aligned(8))) = 0x2020404040808080LL;
+
+static PNG_CONST ull _mask32_3 __attribute__((used, aligned(8))) = 0x0101010102020202LL;
+static PNG_CONST ull _mask32_2 __attribute__((used, aligned(8))) = 0x0404040408080808LL;
+static PNG_CONST ull _mask32_1 __attribute__((used, aligned(8))) = 0x1010101020202020LL;
+static PNG_CONST ull _mask32_0 __attribute__((used, aligned(8))) = 0x4040404080808080LL;
+
+static PNG_CONST ull _mask48_5 __attribute__((used, aligned(8))) = 0x0101010101010202LL;
+static PNG_CONST ull _mask48_4 __attribute__((used, aligned(8))) = 0x0202020204040404LL;
+static PNG_CONST ull _mask48_3 __attribute__((used, aligned(8))) = 0x0404080808080808LL;
+static PNG_CONST ull _mask48_2 __attribute__((used, aligned(8))) = 0x1010101010102020LL;
+static PNG_CONST ull _mask48_1 __attribute__((used, aligned(8))) = 0x2020202040404040LL;
+static PNG_CONST ull _mask48_0 __attribute__((used, aligned(8))) = 0x4040808080808080LL;
+
+// png_do_read_interlace() constants:
+static PNG_CONST ull _amask5_3_0  __attribute__((aligned(8))) = 0x0000000000FFFFFFLL;  // was _const4
+static PNG_CONST ull _amask7_1_0  __attribute__((aligned(8))) = 0x00000000000000FFLL;  // was _const6
+
+// png_read_filter_row_mmx_avg() constants:
+static PNG_CONST ull _LBCarryMask __attribute__((used, aligned(8))) = 0x0101010101010101LL;
+static PNG_CONST ull _HBClearMask __attribute__((used, aligned(8))) = 0x7f7f7f7f7f7f7f7fLL;
+static PNG_CONST ull _amask0_8_0  __attribute__((used, aligned(8))) = 0xFFFFFFFFFFFFFFFFLL;
+static PNG_CONST ull _amask6_2_0  __attribute__((used, aligned(8))) = 0x000000000000FFFFLL;
+
+// png_read_filter_row_mmx_paeth() constants:
+static PNG_CONST ull _amask4_4_0  __attribute__((used, aligned(8))) = 0x00000000FFFFFFFFLL;
+static PNG_CONST ull _amask0_2_6  __attribute__((used, aligned(8))) = 0xFFFF000000000000LL;
+
+// png_read_filter_row_mmx_sub() constants:
+static PNG_CONST ull _amask2_3_3  __attribute__((used, aligned(8))) = 0x0000FFFFFF000000LL;
+static PNG_CONST ull _amask4_2_2  __attribute__((used, aligned(8))) = 0x00000000FFFF0000LL;
+
+#define MASK8_0        "_mask8_0"
+#define MASK16_0       "_mask16_0"
+#define MASK16_1       "_mask16_1"
+#define MASK24_0       "_mask24_0"
+#define MASK24_1       "_mask24_1"
+#define MASK24_2       "_mask24_2"
+#define MASK32_0       "_mask32_0"
+#define MASK32_1       "_mask32_1"
+#define MASK32_2       "_mask32_2"
+#define MASK32_3       "_mask32_3"
+#define MASK48_0       "_mask48_0"
+#define MASK48_1       "_mask48_1"
+#define MASK48_2       "_mask48_2"
+#define MASK48_3       "_mask48_3"
+#define MASK48_4       "_mask48_4"
+#define MASK48_5       "_mask48_5"
+#define AMASK5_3_0     "_amask5_3_0"
+#define AMASK7_1_0     "_amask7_1_0"
+#define LB_CARRY_MASK  "_LBCarryMask"
+#define HB_CLEAR_MASK  "_HBClearMask"
+#define AMASK0_8_0     "_amask0_8_0"
+#define AMASK6_2_0     "_amask6_2_0"
+#define AMASK4_4_0     "_amask4_4_0"
+#define AMASK0_2_6     "_amask0_2_6"
+#define AMASK2_3_3     "_amask2_3_3"
+#define AMASK4_2_2     "_amask4_2_2"
+
+#endif // ?PNG_x86_64_USE_GOTPCREL
+
+
+#if defined(PNG_HAVE_MMX_READ_FILTER_ROW) || defined(PNG_HAVE_MMX_COMBINE_ROW)
+
+// this block is specific to png_read_filter_row_mmx_paeth() except for
+// LOAD_GOT_rbp and RESTORE_rbp, which are also used in png_combine_row()
+#if defined(PNG_x86_64_USE_GOTPCREL)
+#  define pa_TEMP                "%%r11d"
+#  define pb_TEMP                "%%r12d"
+#  define pc_TEMP                "%%r13d"
+#  if defined(PNG_CLOBBER_x86_64_REGS_SUPPORTED)  // works as of gcc 3.4.3 ...
+#    define SAVE_r11_r12_r13
+#    define RESTORE_r11_r12_r13
+#    define _CLOBBER_r11_r12_r13 ,"%r11", "%r12", "%r13"
+#    define CLOBBER_r11_r12_r13  "%r11", "%r12", "%r13"
+#  else // !PNG_CLOBBER_x86_64_REGS_SUPPORTED
+#    define SAVE_r11_r12_r13     "pushq %%r11  \n\t" \
+                                 "pushq %%r12  \n\t" \
+                                 "pushq %%r13  \n\t"  // "normally 0-extended"
+#    define RESTORE_r11_r12_r13  "popq  %%r13  \n\t" \
+                                 "popq  %%r12  \n\t" \
+                                 "popq  %%r11  \n\t"
+#    define _CLOBBER_r11_r12_r13
+#    define CLOBBER_r11_r12_r13
+#  endif
+#  define LOAD_GOT_rbp           "pushq %%rbp                        \n\t" \
+                                 "movq  _c64@GOTPCREL(%%rip), %%rbp  \n\t"
+#  define RESTORE_rbp            "popq  %%rbp                        \n\t"
+#else // 32-bit and/or non-PIC
+#  if defined(PNG_THREAD_UNSAFE_OK)
+     // These variables are used in png_read_filter_row_mmx_paeth() and would be
+     //   local variables if not for gcc-inline-assembly addressing limitations
+     //   (some apparently related to ELF format, others to CPU type).
+     //
+     // WARNING: Their presence defeats the thread-safety of libpng.
+     static int                     _patemp  __attribute__((used));
+     static int                     _pbtemp  __attribute__((used));
+     static int                     _pctemp  __attribute__((used));
+#    define pa_TEMP                "_patemp"
+#    define pb_TEMP                "_pbtemp"  // temp variables for
+#    define pc_TEMP                "_pctemp"  //  Paeth routine
+#    define SAVE_r11_r12_r13
+#    define RESTORE_r11_r12_r13
+#    define _CLOBBER_r11_r12_r13   // not using regs => not clobbering
+#    define CLOBBER_r11_r12_r13
+#  endif // PNG_THREAD_UNSAFE_OK
+#  define LOAD_GOT_rbp
+#  define RESTORE_rbp
 #endif
+
+#if defined(__x86_64__)
+#  define SAVE_ebp
+#  define RESTORE_ebp
+#  define _CLOBBER_ebp         ,"%ebp"
+#  define CLOBBER_ebp          "%ebp"
+#  define SAVE_FullLength      "movl %%eax, %%r15d  \n\t"
+#  define RESTORE_FullLength   "movl %%r15d, "     // may go into eax or ecx
+#  if defined(PNG_CLOBBER_x86_64_REGS_SUPPORTED)   // works as of gcc 3.4.3 ...
+#    define SAVE_r15
+#    define RESTORE_r15
+#    define _CLOBBER_r15       ,"%r15"
+#  else
+#    define SAVE_r15           "pushq %%r15  \n\t"
+#    define RESTORE_r15        "popq  %%r15  \n\t"
+#    define _CLOBBER_r15
+#  endif
+#  define PBP                  "%%rbp"             // regs used for 64-bit
+#  define PAX                  "%%rax"             //  pointers or in
+#  define PBX                  "%%rbx"             //  combination with
+#  define PCX                  "%%rcx"             //  64-bit pointer-regs
+#  define PDX                  "%%rdx"             //  (base/index pairs,
+#  define PSI                  "%%rsi"             //  add/sub/mov pairs)
+#  define CLEAR_BOTTOM_3_BITS  "and  $0xfffffffffffffff8, "
+#else
+#  define SAVE_ebp             "pushl %%ebp \n\t"  // clobber list doesn't work
+#  define RESTORE_ebp          "popl  %%ebp \n\t"  //  for %ebp on 32-bit; not
+#  define _CLOBBER_ebp                             //  clear why not
+#  define CLOBBER_ebp
+#  define SAVE_FullLength      "pushl %%eax \n\t"
+#  define RESTORE_FullLength   "popl "             // eax (avg) or ecx (paeth)
+#  define SAVE_r15
+#  define RESTORE_r15
+#  define _CLOBBER_r15
+#  define PBP                  "%%ebp"             // regs used for or in
+#  define PAX                  "%%eax"             //  combination with
+#  define PBX                  "%%ebx"             //  "normal," 32-bit
+#  define PCX                  "%%ecx"             //  pointers
+#  define PDX                  "%%edx"
+#  define PSI                  "%%esi"
+#  define CLEAR_BOTTOM_3_BITS  "and  $0xfffffff8, "
+#endif
+
+// CLOB_COMMA_ebx_ebp:  need comma ONLY if both CLOBBER_ebp and CLOBBER_GOT_ebx
+//                      have values, i.e., only if __x86_64__ AND !__PIC__
+#if defined(__x86_64__) && !defined(__PIC__)
+#  define CLOB_COMMA_ebx_ebp    , // clobbering both ebp and ebx => need comma
+#else
+#  define CLOB_COMMA_ebx_ebp
+#endif
+
+// CLOB_COMMA_ebX_r1X:  need comma UNLESS both CLOBBER_ebp and CLOBBER_GOT_ebx
+//                   are empty OR CLOBBER_r11_r12_r13 is empty--i.e., NO comma
+//                   if (!__x86_64__ AND __PIC__) OR !(PNG_x86_64_USE_GOTPCREL
+//                   AND PNG_CLOBBER_x86_64_REGS_SUPPORTED)   (double sigh...)
+#if (!defined(__x86_64__) && defined(__PIC__)) || \
+    !defined(PNG_x86_64_USE_GOTPCREL) || \
+    !defined(PNG_CLOBBER_x86_64_REGS_SUPPORTED)
+#  define CLOB_COMMA_ebX_r1X
+#else
+#  define CLOB_COMMA_ebX_r1X    , // clobbering (ebp OR ebx) AND r11_r12_r13
+#endif
+
+// CLOB_COLON_ebx_ebp:  need colon unless CLOBBER_ebp and CLOBBER_GOT_ebx are
+//                      BOTH empty--i.e., NO colon if (!__x86_64__ AND __PIC__)
+// CLOB_COLON_ebx_ebp_r1X:  if, in addition, CLOBBER_r11_r12_r13 is empty, then
+//                          no colon for Paeth blocks, either--i.e., NO colon
+//                          if !(PNG_x86_64_USE_GOTPCREL AND
+//                               PNG_CLOBBER_x86_64_REGS_SUPPORTED)
+#if (!defined(__x86_64__) && defined(__PIC__))
+#  define CLOB_COLON_ebx_ebp
+#  if !(defined(PNG_x86_64_USE_GOTPCREL) && \
+        defined(PNG_CLOBBER_x86_64_REGS_SUPPORTED))
+#    define CLOB_COLON_ebx_ebp_r1X
+#  else
+#    define CLOB_COLON_ebx_ebp_r1X  : // clobbering ebp OR ebx OR r11_r12_r13
+#  endif
+#else
+#  define CLOB_COLON_ebx_ebp        : // clobbering ebp OR ebx
+#  define CLOB_COLON_ebx_ebp_r1X    : // clobbering ebp OR ebx OR r11_r12_r13
+#endif
+
+#endif // PNG_HAVE_MMX_READ_FILTER_ROW
+
+#if defined(__PIC__)  // macros to save, restore index to Global Offset Table
+#  if defined(__x86_64__)
+#    define SAVE_GOT_ebx     "pushq %%rbx \n\t"
+#    define RESTORE_GOT_ebx  "popq  %%rbx \n\t"
+#  else
+#    define SAVE_GOT_ebx     "pushl %%ebx \n\t"
+#    define RESTORE_GOT_ebx  "popl  %%ebx \n\t"
+#  endif
+#  define _CLOBBER_GOT_ebx   // explicitly saved, restored => not clobbered
+#  define CLOBBER_GOT_ebx
+#else
+#  define SAVE_GOT_ebx
+#  define RESTORE_GOT_ebx
+#  define _CLOBBER_GOT_ebx   ,"%ebx"
+#  define CLOBBER_GOT_ebx    "%ebx"
+#endif
+
+#if defined(PNG_HAVE_MMX_COMBINE_ROW) || defined(PNG_HAVE_MMX_READ_INTERLACE)
+#  define BPP2  2
+#  define BPP3  3  // bytes per pixel (a.k.a. pixel_bytes)
+#  define BPP4  4  // (defined only to help avoid cut-and-paste errors)
+#  define BPP6  6
+#  define BPP8  8
 #endif
 
 
-/* These constants are used in the inlined MMX assembly code.
-   Ignore gcc's "At top level: defined but not used" warnings. */
 
-/* GRR 20000706:  originally _unmask was needed only when compiling with -fPIC,
- *  since that case uses the %ebx register for indexing the Global Offset Table
- *  and there were no other registers available.  But gcc 2.95 and later emit
- *  "more than 10 operands in `asm'" errors when %ebx is used to preload unmask
- *  in the non-PIC case, so we'll just use the global unconditionally now.
- */
-#ifdef PNG_THREAD_UNSAFE_OK
-static int _unmask;
-#endif
+static int _mmx_supported = 2; // 0: no MMX; 1: MMX supported; 2: not tested
 
-const static unsigned long long _mask8_0  = 0x0102040810204080LL;
+/*===========================================================================*/
+/*                                                                           */
+/*                      P N G _ M M X _ S U P P O R T                        */
+/*                                                                           */
+/*===========================================================================*/
 
-const static unsigned long long _mask16_1 = 0x0101020204040808LL;
-const static unsigned long long _mask16_0 = 0x1010202040408080LL;
+// GRR NOTES:  (1) the following code assumes 386 or better (pushfl/popfl)
+//             (2) all instructions compile with gcc 2.7.2.3 and later
+//           x (3) the function is moved down here to prevent gcc from
+//           x      inlining it in multiple places and then barfing be-
+//           x      cause the ".NOT_SUPPORTED" label is multiply defined
+//                  [need to retest with gcc 2.7.2.3]
 
-const static unsigned long long _mask24_2 = 0x0101010202020404LL;
-const static unsigned long long _mask24_1 = 0x0408080810101020LL;
-const static unsigned long long _mask24_0 = 0x2020404040808080LL;
+// GRR 20070524:  This declaration apparently is compatible with but supersedes
+//   the one in png.h; in any case, the generated object file is slightly
+//   smaller.  It is unnecessary with gcc 4.1.2, but gcc 2.x apparently
+//   replicated the ".NOT_SUPPORTED" label in each location the function was
+//   inlined, leading to compilation errors due to the "multiply defined"
+//   label.  Old workaround was to leave the function at the end of this
+//   file; new one (still testing) is to use a gcc-specific function attribute
+//   to prevent local inlining.
+int PNGAPI
+png_mmx_support(void) __attribute__((noinline));
 
-const static unsigned long long _mask32_3 = 0x0101010102020202LL;
-const static unsigned long long _mask32_2 = 0x0404040408080808LL;
-const static unsigned long long _mask32_1 = 0x1010101020202020LL;
-const static unsigned long long _mask32_0 = 0x4040404080808080LL;
-
-const static unsigned long long _mask48_5 = 0x0101010101010202LL;
-const static unsigned long long _mask48_4 = 0x0202020204040404LL;
-const static unsigned long long _mask48_3 = 0x0404080808080808LL;
-const static unsigned long long _mask48_2 = 0x1010101010102020LL;
-const static unsigned long long _mask48_1 = 0x2020202040404040LL;
-const static unsigned long long _mask48_0 = 0x4040808080808080LL;
-
-const static unsigned long long _const4   = 0x0000000000FFFFFFLL;
-//const static unsigned long long _const5 = 0x000000FFFFFF0000LL;     // NOT USED
-const static unsigned long long _const6   = 0x00000000000000FFLL;
-
-// These are used in the row-filter routines and should/would be local
-//  variables if not for gcc addressing limitations.
-// WARNING: Their presence probably defeats the thread safety of libpng.
-
-#ifdef PNG_THREAD_UNSAFE_OK
-static png_uint_32  _FullLength;
-static png_uint_32  _MMXLength;
-static int          _dif;
-static int          _patemp; // temp variables for Paeth routine
-static int          _pbtemp;
-static int          _pctemp;
-#endif
-
-void /* PRIVATE */
-png_squelch_warnings(void)
+int PNGAPI
+png_mmx_support(void)
 {
-#ifdef PNG_THREAD_UNSAFE_OK
-   _dif = _dif;
-   _patemp = _patemp;
-   _pbtemp = _pbtemp;
-   _pctemp = _pctemp;
-   _MMXLength = _MMXLength;
+#if defined(PNG_MMX_CODE_SUPPORTED)  // superfluous, but what the heck
+    int result;
+    __asm__ __volatile__ (
+#if defined(__x86_64__)
+        "pushq %%rbx          \n\t"  // rbx gets clobbered by CPUID instruction
+        "pushq %%rcx          \n\t"  // so does rcx...
+        "pushq %%rdx          \n\t"  // ...and rdx (but rcx & rdx safe on Linux)
+        "pushfq               \n\t"  // save Eflag to stack
+        "popq %%rax           \n\t"  // get Eflag from stack into rax
+        "movq %%rax, %%rcx    \n\t"  // make another copy of Eflag in rcx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushq %%rax          \n\t"  // save modified Eflag back to stack
+        "popfq                \n\t"  // restore modified value to Eflag reg
+        "pushfq               \n\t"  // save Eflag to stack
+        "popq %%rax           \n\t"  // get Eflag from stack
+        "pushq %%rcx          \n\t"  // save original Eflag to stack
+        "popfq                \n\t"  // restore original Eflag
+#else
+        "pushl %%ebx          \n\t"  // ebx gets clobbered by CPUID instruction
+        "pushl %%ecx          \n\t"  // so does ecx...
+        "pushl %%edx          \n\t"  // ...and edx (but ecx & edx safe on Linux)
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack into eax
+        "movl %%eax, %%ecx    \n\t"  // make another copy of Eflag in ecx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushl %%eax          \n\t"  // save modified Eflag back to stack
+        "popfl                \n\t"  // restore modified value to Eflag reg
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack
+        "pushl %%ecx          \n\t"  // save original Eflag to stack
+        "popfl                \n\t"  // restore original Eflag
 #endif
-   _const4  = _const4;
-   _const6  = _const6;
-   _mask8_0  = _mask8_0;
-   _mask16_1 = _mask16_1;
-   _mask16_0 = _mask16_0;
-   _mask24_2 = _mask24_2;
-   _mask24_1 = _mask24_1;
-   _mask24_0 = _mask24_0;
-   _mask32_3 = _mask32_3;
-   _mask32_2 = _mask32_2;
-   _mask32_1 = _mask32_1;
-   _mask32_0 = _mask32_0;
-   _mask48_5 = _mask48_5;
-   _mask48_4 = _mask48_4;
-   _mask48_3 = _mask48_3;
-   _mask48_2 = _mask48_2;
-   _mask48_1 = _mask48_1;
-   _mask48_0 = _mask48_0;
-}
+        "xorl %%ecx, %%eax    \n\t"  // compare new Eflag with original Eflag
+        "jz 0f                \n\t"  // if same, CPUID instr. is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero
+//      ".byte  0x0f, 0xa2    \n\t"  // CPUID instruction (two-byte opcode)
+        "cpuid                \n\t"  // get the CPU identification info
+        "cmpl $1, %%eax       \n\t"  // make sure eax return non-zero value
+        "jl 0f                \n\t"  // if eax is zero, MMX is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero and...
+        "incl %%eax           \n\t"  // ...increment eax to 1.  This pair is
+                                     // faster than the instruction "mov eax, 1"
+        "cpuid                \n\t"  // get the CPU identification info again
+        "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
+        "cmpl $0, %%edx       \n\t"  // 0 = MMX not supported
+        "jz 0f                \n\t"  // non-zero = yes, MMX IS supported
+
+        "movl $1, %%eax       \n\t"  // set return value to 1
+        "jmp  1f              \n\t"  // DONE:  have MMX support
+
+    "0:                       \n\t"  // .NOT_SUPPORTED: target label for jump instructions
+        "movl $0, %%eax       \n\t"  // set return value to 0
+    "1:                       \n\t"  // .RETURN: target label for jump instructions
+#if defined(__x86_64__)
+        "popq %%rdx           \n\t"  // restore rdx
+        "popq %%rcx           \n\t"  // restore rcx
+        "popq %%rbx           \n\t"  // restore rbx
+#else
+        "popl %%edx           \n\t"  // restore edx
+        "popl %%ecx           \n\t"  // restore ecx
+        "popl %%ebx           \n\t"  // restore ebx
+#endif
+
+//      "ret                  \n\t"  // DONE:  no MMX support
+                                     // (fall through to standard C "ret")
+
+        : "=a" (result)              // output list
+
+        :                            // any variables used on input (none)
+
+                                     // no clobber list
+//      , "%ebx", "%ecx", "%edx"     // GRR:  we handle these manually
+//      , "memory"   // if write to a variable gcc thought was in a reg
+//      , "cc"       // "condition codes" (flag bits)
+    );
+    _mmx_supported = result;
+#else
+    _mmx_supported = 0;
 #endif /* PNG_MMX_CODE_SUPPORTED */
 
+    return _mmx_supported;
+}
 
-static int _mmx_supported = 2;
 
 /*===========================================================================*/
 /*                                                                           */
@@ -391,12 +952,6 @@
 
 #if defined(PNG_HAVE_MMX_COMBINE_ROW)
 
-#define BPP2  2
-#define BPP3  3 /* bytes per pixel (a.k.a. pixel_bytes) */
-#define BPP4  4
-#define BPP6  6 /* (defined only to help avoid cut-and-paste errors) */
-#define BPP8  8
-
 /* Combines the row recently read in with the previous row.
    This routine takes care of alpha and transparency if requested.
    This routine also handles the two methods of progressive display
@@ -414,9 +969,14 @@
 void /* PRIVATE */
 png_combine_row(png_structp png_ptr, png_bytep row, int mask)
 {
+   int dummy_value_a;    // fix 'forbidden register spilled' error
+   int dummy_value_c;
+   int dummy_value_d;
+   png_bytep dummy_value_S;
+   png_bytep dummy_value_D;
+
    png_debug(1, "in png_combine_row (pnggccrd.c)\n");
 
-#if defined(PNG_MMX_CODE_SUPPORTED)
    if (_mmx_supported == 2) {
 #if !defined(PNG_1_0_X)
        /* this should have happened in png_init_mmx_flags() already */
@@ -424,7 +984,6 @@
 #endif
        png_mmx_support();
    }
-#endif
 
    if (mask == 0xff)
    {
@@ -436,6 +995,478 @@
    {
       switch (png_ptr->row_info.pixel_depth)
       {
+         case 24:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+#if !defined(PNG_1_0_X)
+            if (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
+#else
+            if (_mmx_supported)
+#endif
+            {
+               png_uint_32 len;
+               int diff;
+
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width & ~7;          // reduce to multiple of 8
+               diff = (int) (png_ptr->width & 7);   // amount lost
+
+               __asm__ __volatile__ (
+                  "not       %%edx            \n\t" // mask => unmask
+                  "movd      %%edx, %%mm7     \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // unmask => mask for later
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  LOAD_GOT_rbp
+                  "movq   " MASK24_0 ", %%mm0 \n\t" // _mask24_0 -> mm0
+                  "movq   " MASK24_1 ", %%mm1 \n\t" // _mask24_1 -> mm1
+                  "movq   " MASK24_2 ", %%mm2 \n\t" // _mask24_2 -> mm2
+                  RESTORE_rbp
+
+                  "pand      %%mm7, %%mm0     \n\t"
+                  "pand      %%mm7, %%mm1     \n\t"
+                  "pand      %%mm7, %%mm2     \n\t"
+
+                  "pcmpeqb   %%mm6, %%mm0     \n\t"
+                  "pcmpeqb   %%mm6, %%mm1     \n\t"
+                  "pcmpeqb   %%mm6, %%mm2     \n\t"
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %3       \n\t" // load source
+// preload        "movl      dstptr, %4       \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        mainloop24end    \n\t"
+
+                "mainloop24:                  \n\t"
+                  "movq      (%3), %%mm4      \n\t"
+                  "pand      %%mm0, %%mm4     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "movq      (%4), %%mm7      \n\t"
+                  "pandn     %%mm7, %%mm6     \n\t"
+                  "por       %%mm6, %%mm4     \n\t"
+                  "movq      %%mm4, (%4)      \n\t"
+
+                  "movq      8(%3), %%mm5     \n\t"
+                  "pand      %%mm1, %%mm5     \n\t"
+                  "movq      %%mm1, %%mm7     \n\t"
+                  "movq      8(%4), %%mm6     \n\t"
+                  "pandn     %%mm6, %%mm7     \n\t"
+                  "por       %%mm7, %%mm5     \n\t"
+                  "movq      %%mm5, 8(%4)     \n\t"
+
+                  "movq      16(%3), %%mm6    \n\t"
+                  "pand      %%mm2, %%mm6     \n\t"
+                  "movq      %%mm2, %%mm4     \n\t"
+                  "movq      16(%4), %%mm7    \n\t"
+                  "pandn     %%mm7, %%mm4     \n\t"
+                  "por       %%mm4, %%mm6     \n\t"
+                  "movq      %%mm6, 16(%4)    \n\t"
+
+                  "add       $24, %3          \n\t" // inc by 24 bytes processed
+                  "add       $24, %4          \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+
+                  "ja        mainloop24       \n\t"
+
+                "mainloop24end:               \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end24            \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // make low byte, high byte
+
+                "secondloop24:                \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip24           \n\t" // if CF = 0
+                  "movw      (%3), %%ax       \n\t"
+                  "movw      %%ax, (%4)       \n\t"
+                  "xorl      %%eax, %%eax     \n\t"
+                  "movb      2(%3), %%al      \n\t"
+                  "movb      %%al, 2(%4)      \n\t"
+
+                "skip24:                      \n\t"
+                  "add       $3, %3           \n\t"
+                  "add       $3, %4           \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop24     \n\t"
+
+                "end24:                       \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  : "=a" (dummy_value_a),           // output regs (dummy)
+                    "=d" (dummy_value_d),
+                    "=c" (dummy_value_c),
+                    "=S" (dummy_value_S),
+                    "=D" (dummy_value_D)
+
+                  : "0" (diff),        // eax       // input regs
+                    "1" (mask),        // edx
+                    "2" (len),         // ecx
+// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
+                    "3" (srcptr),      // esi/rsi
+                    "4" (dstptr)       // edi/rdi
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                  : "%mm0", "%mm1", "%mm2"          // clobber list
+                  , "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* not _mmx_supported - use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = BPP3 * png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = BPP3 * png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = BPP3 * png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = BPP3 * len;   /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff*BPP3;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            } /* end of else (_mmx_supported) */
+
+            break;
+         }       /* end 24 bpp */
+
+         // formerly claimed to be most common case (combining 32-bit RGBA),
+         // but almost certainly less common than 24-bit RGB case
+         case 32:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+#if !defined(PNG_1_0_X)
+            if (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
+#else
+            if (_mmx_supported)
+#endif
+            {
+               png_uint_32 len;
+               int diff;
+
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width & ~7;          // reduce to multiple of 8
+               diff = (int) (png_ptr->width & 7);   // amount lost
+
+               __asm__ __volatile__ (
+                  "not       %%edx            \n\t" // mask => unmask
+                  "movd      %%edx, %%mm7     \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // unmask => mask for later
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  LOAD_GOT_rbp
+                  "movq   " MASK32_0 ", %%mm0 \n\t" // _mask32_0
+                  "movq   " MASK32_1 ", %%mm1 \n\t" // _mask32_1
+                  "movq   " MASK32_2 ", %%mm2 \n\t" // _mask32_2
+                  "movq   " MASK32_3 ", %%mm3 \n\t" // _mask32_3
+                  RESTORE_rbp
+
+                  "pand      %%mm7, %%mm0     \n\t"
+                  "pand      %%mm7, %%mm1     \n\t"
+                  "pand      %%mm7, %%mm2     \n\t"
+                  "pand      %%mm7, %%mm3     \n\t"
+
+                  "pcmpeqb   %%mm6, %%mm0     \n\t"
+                  "pcmpeqb   %%mm6, %%mm1     \n\t"
+                  "pcmpeqb   %%mm6, %%mm2     \n\t"
+                  "pcmpeqb   %%mm6, %%mm3     \n\t"
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %3       \n\t" // load source
+// preload        "movl      dstptr, %4       \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t" // lcr
+                  "jz        mainloop32end    \n\t"
+
+                "mainloop32:                  \n\t"
+                  "movq      (%3), %%mm4      \n\t"
+                  "pand      %%mm0, %%mm4     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "movq      (%4), %%mm7      \n\t"
+                  "pandn     %%mm7, %%mm6     \n\t"
+                  "por       %%mm6, %%mm4     \n\t"
+                  "movq      %%mm4, (%4)      \n\t"
+
+                  "movq      8(%3), %%mm5     \n\t"
+                  "pand      %%mm1, %%mm5     \n\t"
+                  "movq      %%mm1, %%mm7     \n\t"
+                  "movq      8(%4), %%mm6     \n\t"
+                  "pandn     %%mm6, %%mm7     \n\t"
+                  "por       %%mm7, %%mm5     \n\t"
+                  "movq      %%mm5, 8(%4)     \n\t"
+
+                  "movq      16(%3), %%mm6    \n\t"
+                  "pand      %%mm2, %%mm6     \n\t"
+                  "movq      %%mm2, %%mm4     \n\t"
+                  "movq      16(%4), %%mm7    \n\t"
+                  "pandn     %%mm7, %%mm4     \n\t"
+                  "por       %%mm4, %%mm6     \n\t"
+                  "movq      %%mm6, 16(%4)    \n\t"
+
+                  "movq      24(%3), %%mm7    \n\t"
+                  "pand      %%mm3, %%mm7     \n\t"
+                  "movq      %%mm3, %%mm5     \n\t"
+                  "movq      24(%4), %%mm4    \n\t"
+                  "pandn     %%mm4, %%mm5     \n\t"
+                  "por       %%mm5, %%mm7     \n\t"
+                  "movq      %%mm7, 24(%4)    \n\t"
+
+                  "add       $32, %3          \n\t" // inc by 32 bytes processed
+                  "add       $32, %4          \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+                  "ja        mainloop32       \n\t"
+
+                "mainloop32end:               \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end32            \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // low byte => high byte
+
+                "secondloop32:                \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip32           \n\t" // if CF = 0
+                  "movl      (%3), %%eax      \n\t"
+                  "movl      %%eax, (%4)      \n\t"
+
+                "skip32:                      \n\t"
+                  "add       $4, %3           \n\t"
+                  "add       $4, %4           \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop32     \n\t"
+
+                "end32:                       \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  : "=a" (dummy_value_a),           // output regs (dummy)
+                    "=d" (dummy_value_d),
+                    "=c" (dummy_value_c),
+                    "=S" (dummy_value_S),
+                    "=D" (dummy_value_D)
+
+                  : "0" (diff),        // eax       // input regs
+                    "1" (mask),        // edx
+                    "2" (len),         // ecx
+// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
+                    "3" (srcptr),      // esi/rsi
+                    "4" (dstptr)       // edi/rdi
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                  : "%mm0", "%mm1", "%mm2", "%mm3"  // clobber list
+                  , "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+               );
+            }
+            else /* not _mmx_supported - use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = BPP4 * png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = BPP4 * png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = BPP4 * png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = BPP4 * len;   /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff*BPP4;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            } /* end of else (_mmx_supported) */
+
+            break;
+         }       /* end 32 bpp */
+
+         case 8:        /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+#if !defined(PNG_1_0_X)
+            if (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
+#else
+            if (_mmx_supported)
+#endif
+            {
+               png_uint_32 len;
+               int diff;
+
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               len  = png_ptr->width & ~7;          // reduce to multiple of 8
+               diff = (int) (png_ptr->width & 7);   // amount lost
+
+               __asm__ __volatile__ (
+                  "not       %%edx            \n\t" // mask => unmask
+                  "movd      %%edx, %%mm7     \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // unmask => mask for later
+                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
+                  "punpcklbw %%mm7, %%mm7     \n\t"
+                  "punpcklwd %%mm7, %%mm7     \n\t"
+                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
+
+                  LOAD_GOT_rbp
+                  "movq   " MASK8_0 ", %%mm0  \n\t" // _mask8_0 -> mm0
+                  RESTORE_rbp
+
+                  "pand      %%mm7, %%mm0     \n\t" // nonzero if keep byte
+                  "pcmpeqb   %%mm6, %%mm0     \n\t" // zeros->1s, v versa
+
+// preload        "movl      len, %%ecx       \n\t" // load length of line
+// preload        "movl      srcptr, %3       \n\t" // load source
+// preload        "movl      dstptr, %4       \n\t" // load dest
+
+                  "cmpl      $0, %%ecx        \n\t" // len == 0 ?
+                  "je        mainloop8end     \n\t"
+
+                "mainloop8:                   \n\t"
+                  "movq      (%3), %%mm4      \n\t" // *srcptr
+                  "pand      %%mm0, %%mm4     \n\t"
+                  "movq      %%mm0, %%mm6     \n\t"
+                  "pandn     (%4), %%mm6      \n\t" // *dstptr
+                  "por       %%mm6, %%mm4     \n\t"
+                  "movq      %%mm4, (%4)      \n\t"
+                  "add       $8, %3           \n\t" // inc by 8 bytes processed
+                  "add       $8, %4           \n\t"
+                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
+                  "ja        mainloop8        \n\t"
+
+                "mainloop8end:                \n\t"
+// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
+                  "movl      %%eax, %%ecx     \n\t"
+                  "cmpl      $0, %%ecx        \n\t"
+                  "jz        end8             \n\t"
+// preload        "movl      mask, %%edx      \n\t"
+                  "sall      $24, %%edx       \n\t" // make low byte, high byte
+
+                "secondloop8:                 \n\t"
+                  "sall      %%edx            \n\t" // move high bit to CF
+                  "jnc       skip8            \n\t" // if CF = 0
+                  "movb      (%3), %%al       \n\t"
+                  "movb      %%al, (%4)       \n\t"
+
+                "skip8:                       \n\t"
+                  "inc       %3               \n\t"
+                  "inc       %4               \n\t"
+                  "decl      %%ecx            \n\t"
+                  "jnz       secondloop8      \n\t"
+
+                "end8:                        \n\t"
+                  "EMMS                       \n\t" // DONE
+
+                  : "=a" (dummy_value_a),           // output regs (dummy)
+                    "=d" (dummy_value_d),
+                    "=c" (dummy_value_c),
+                    "=S" (dummy_value_S),
+                    "=D" (dummy_value_D)
+
+                  : "0" (diff),        // eax       // input regs
+                    "1" (mask),        // edx
+                    "2" (len),         // ecx
+// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
+                    "3" (srcptr),      // esi/rsi
+                    "4" (dstptr)       // edi/rdi
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                  : "%mm0", "%mm4", "%mm6", "%mm7"  // clobber list
+#endif
+               );
+            }
+            else /* not _mmx_supported - use modified C routine */
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = len;  /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff /* *BPP1 */ ;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+
+            } /* end of else (_mmx_supported) */
+
+            break;
+         }       /* end 8 bpp */
+
          case 1:        /* png_ptr->row_info.pixel_depth */
          {
             png_bytep sp;
@@ -451,16 +1482,16 @@
 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
             if (png_ptr->transformations & PNG_PACKSWAP)
             {
-                s_start = 0;
-                s_end = 7;
-                s_inc = 1;
+               s_start = 0;
+               s_end = 7;
+               s_inc = 1;
             }
             else
 #endif
             {
-                s_start = 7;
-                s_end = 0;
-                s_inc = -1;
+               s_start = 7;
+               s_end = 0;
+               s_inc = -1;
             }
 
             shift = s_start;
@@ -491,7 +1522,7 @@
                   m >>= 1;
             }
             break;
-         }
+         }       /* end 1 bpp */
 
          case 2:        /* png_ptr->row_info.pixel_depth */
          {
@@ -546,7 +1577,7 @@
                   m >>= 1;
             }
             break;
-         }
+         }       /* end 2 bpp */
 
          case 4:        /* png_ptr->row_info.pixel_depth */
          {
@@ -575,6 +1606,7 @@
                s_end = 0;
                s_inc = -4;
             }
+
             shift = s_start;
 
             for (i = 0; i < png_ptr->width; i++)
@@ -600,181 +1632,40 @@
                   m >>= 1;
             }
             break;
-         }
-
-         case 8:        /* png_ptr->row_info.pixel_depth */
-         {
-            png_bytep srcptr;
-            png_bytep dstptr;
-
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
-#if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && _mmx_supported */ )
-#else
-            if (_mmx_supported)
-#endif
-            {
-               png_uint_32 len;
-               int diff;
-               int dummy_value_a;   // fix 'forbidden register spilled' error
-               int dummy_value_d;
-               int dummy_value_c;
-               int dummy_value_S;
-               int dummy_value_D;
-               _unmask = ~mask;            // global variable for -fPIC version
-               srcptr = png_ptr->row_buf + 1;
-               dstptr = row;
-               len  = png_ptr->width &~7;  // reduce to multiple of 8
-               diff = (int) (png_ptr->width & 7);  // amount lost
-
-               __asm__ __volatile__ (
-                  "movd      _unmask, %%mm7  \n\t" // load bit pattern
-                  "psubb     %%mm6, %%mm6    \n\t" // zero mm6
-                  "punpcklbw %%mm7, %%mm7    \n\t"
-                  "punpcklwd %%mm7, %%mm7    \n\t"
-                  "punpckldq %%mm7, %%mm7    \n\t" // fill reg with 8 masks
-
-                  "movq      _mask8_0, %%mm0 \n\t"
-                  "pand      %%mm7, %%mm0    \n\t" // nonzero if keep byte
-                  "pcmpeqb   %%mm6, %%mm0    \n\t" // zeros->1s, v versa
-
-// preload        "movl      len, %%ecx      \n\t" // load length of line
-// preload        "movl      srcptr, %%esi   \n\t" // load source
-// preload        "movl      dstptr, %%edi   \n\t" // load dest
-
-                  "cmpl      $0, %%ecx       \n\t" // len == 0 ?
-                  "je        mainloop8end    \n\t"
-
-                "mainloop8:                  \n\t"
-                  "movq      (%%esi), %%mm4  \n\t" // *srcptr
-                  "pand      %%mm0, %%mm4    \n\t"
-                  "movq      %%mm0, %%mm6    \n\t"
-                  "pandn     (%%edi), %%mm6  \n\t" // *dstptr
-                  "por       %%mm6, %%mm4    \n\t"
-                  "movq      %%mm4, (%%edi)  \n\t"
-                  "addl      $8, %%esi       \n\t" // inc by 8 bytes processed
-                  "addl      $8, %%edi       \n\t"
-                  "subl      $8, %%ecx       \n\t" // dec by 8 pixels processed
-                  "ja        mainloop8       \n\t"
-
-                "mainloop8end:               \n\t"
-// preload        "movl      diff, %%ecx     \n\t" // (diff is in eax)
-                  "movl      %%eax, %%ecx    \n\t"
-                  "cmpl      $0, %%ecx       \n\t"
-                  "jz        end8            \n\t"
-// preload        "movl      mask, %%edx     \n\t"
-                  "sall      $24, %%edx      \n\t" // make low byte, high byte
-
-                "secondloop8:                \n\t"
-                  "sall      %%edx           \n\t" // move high bit to CF
-                  "jnc       skip8           \n\t" // if CF = 0
-                  "movb      (%%esi), %%al   \n\t"
-                  "movb      %%al, (%%edi)   \n\t"
-
-                "skip8:                      \n\t"
-                  "incl      %%esi           \n\t"
-                  "incl      %%edi           \n\t"
-                  "decl      %%ecx           \n\t"
-                  "jnz       secondloop8     \n\t"
-
-                "end8:                       \n\t"
-                  "EMMS                      \n\t"  // DONE
-
-                  : "=a" (dummy_value_a),           // output regs (dummy)
-                    "=d" (dummy_value_d),
-                    "=c" (dummy_value_c),
-                    "=S" (dummy_value_S),
-                    "=D" (dummy_value_D)
-
-                  : "3" (srcptr),      // esi       // input regs
-                    "4" (dstptr),      // edi
-                    "0" (diff),        // eax
-// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
-                    "2" (len),         // ecx
-                    "1" (mask)         // edx
-
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-                  : "%mm0", "%mm4", "%mm6", "%mm7"  // clobber list
-#endif
-               );
-            }
-            else /* mmx _not supported - Use modified C routine */
-#endif /* PNG_MMX_CODE_SUPPORTED */
-            {
-               register png_uint_32 i;
-               png_uint_32 initial_val = png_pass_start[png_ptr->pass];
-                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
-               register int stride = png_pass_inc[png_ptr->pass];
-                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
-               register int rep_bytes = png_pass_width[png_ptr->pass];
-                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
-               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
-               int diff = (int) (png_ptr->width & 7); /* amount lost */
-               register png_uint_32 final_val = len;  /* GRR bugfix */
-
-               srcptr = png_ptr->row_buf + 1 + initial_val;
-               dstptr = row + initial_val;
-
-               for (i = initial_val; i < final_val; i += stride)
-               {
-                  png_memcpy(dstptr, srcptr, rep_bytes);
-                  srcptr += stride;
-                  dstptr += stride;
-               }
-               if (diff)  /* number of leftover pixels:  3 for pngtest */
-               {
-                  final_val+=diff /* *BPP1 */ ;
-                  for (; i < final_val; i += stride)
-                  {
-                     if (rep_bytes > (int)(final_val-i))
-                        rep_bytes = (int)(final_val-i);
-                     png_memcpy(dstptr, srcptr, rep_bytes);
-                     srcptr += stride;
-                     dstptr += stride;
-                  }
-               }
-
-            } /* end of else (_mmx_supported) */
-
-            break;
-         }       /* end 8 bpp */
+         }       /* end 4 bpp */
 
          case 16:       /* png_ptr->row_info.pixel_depth */
          {
             png_bytep srcptr;
             png_bytep dstptr;
 
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
 #if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && _mmx_supported */ )
+            if (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
 #else
             if (_mmx_supported)
 #endif
             {
                png_uint_32 len;
                int diff;
-               int dummy_value_a;   // fix 'forbidden register spilled' error
-               int dummy_value_d;
-               int dummy_value_c;
-               int dummy_value_S;
-               int dummy_value_D;
-               _unmask = ~mask;            // global variable for -fPIC version
+
                srcptr = png_ptr->row_buf + 1;
                dstptr = row;
-               len  = png_ptr->width &~7;  // reduce to multiple of 8
-               diff = (int) (png_ptr->width & 7); // amount lost //
+               len  = png_ptr->width & ~7;          // reduce to multiple of 8
+               diff = (int) (png_ptr->width & 7);   // amount lost
 
                __asm__ __volatile__ (
-                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // mask => unmask
+                  "movd      %%edx, %%mm7     \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // unmask => mask for later
                   "psubb     %%mm6, %%mm6     \n\t" // zero mm6
                   "punpcklbw %%mm7, %%mm7     \n\t"
                   "punpcklwd %%mm7, %%mm7     \n\t"
                   "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
 
-                  "movq      _mask16_0, %%mm0 \n\t"
-                  "movq      _mask16_1, %%mm1 \n\t"
+                  LOAD_GOT_rbp
+                  "movq   " MASK16_0 ", %%mm0 \n\t" // _mask16_0 -> mm0
+                  "movq   " MASK16_1 ", %%mm1 \n\t" // _mask16_1 -> mm1
+                  RESTORE_rbp
 
                   "pand      %%mm7, %%mm0     \n\t"
                   "pand      %%mm7, %%mm1     \n\t"
@@ -783,31 +1674,31 @@
                   "pcmpeqb   %%mm6, %%mm1     \n\t"
 
 // preload        "movl      len, %%ecx       \n\t" // load length of line
-// preload        "movl      srcptr, %%esi    \n\t" // load source
-// preload        "movl      dstptr, %%edi    \n\t" // load dest
+// preload        "movl      srcptr, %3       \n\t" // load source
+// preload        "movl      dstptr, %4       \n\t" // load dest
 
                   "cmpl      $0, %%ecx        \n\t"
                   "jz        mainloop16end    \n\t"
 
                 "mainloop16:                  \n\t"
-                  "movq      (%%esi), %%mm4   \n\t"
+                  "movq      (%3), %%mm4      \n\t"
                   "pand      %%mm0, %%mm4     \n\t"
                   "movq      %%mm0, %%mm6     \n\t"
-                  "movq      (%%edi), %%mm7   \n\t"
+                  "movq      (%4), %%mm7      \n\t"
                   "pandn     %%mm7, %%mm6     \n\t"
                   "por       %%mm6, %%mm4     \n\t"
-                  "movq      %%mm4, (%%edi)   \n\t"
+                  "movq      %%mm4, (%4)      \n\t"
 
-                  "movq      8(%%esi), %%mm5  \n\t"
+                  "movq      8(%3), %%mm5     \n\t"
                   "pand      %%mm1, %%mm5     \n\t"
                   "movq      %%mm1, %%mm7     \n\t"
-                  "movq      8(%%edi), %%mm6  \n\t"
+                  "movq      8(%4), %%mm6     \n\t"
                   "pandn     %%mm6, %%mm7     \n\t"
                   "por       %%mm7, %%mm5     \n\t"
-                  "movq      %%mm5, 8(%%edi)  \n\t"
+                  "movq      %%mm5, 8(%4)     \n\t"
 
-                  "addl      $16, %%esi       \n\t" // inc by 16 bytes processed
-                  "addl      $16, %%edi       \n\t"
+                  "add       $16, %3          \n\t" // inc by 16 bytes processed
+                  "add       $16, %4          \n\t"
                   "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
                   "ja        mainloop16       \n\t"
 
@@ -822,12 +1713,12 @@
                 "secondloop16:                \n\t"
                   "sall      %%edx            \n\t" // move high bit to CF
                   "jnc       skip16           \n\t" // if CF = 0
-                  "movw      (%%esi), %%ax    \n\t"
-                  "movw      %%ax, (%%edi)    \n\t"
+                  "movw      (%3), %%ax       \n\t"
+                  "movw      %%ax, (%4)       \n\t"
 
                 "skip16:                      \n\t"
-                  "addl      $2, %%esi        \n\t"
-                  "addl      $2, %%edi        \n\t"
+                  "add       $2, %3           \n\t"
+                  "add       $2, %4           \n\t"
                   "decl      %%ecx            \n\t"
                   "jnz       secondloop16     \n\t"
 
@@ -835,26 +1726,25 @@
                   "EMMS                       \n\t" // DONE
 
                   : "=a" (dummy_value_a),           // output regs (dummy)
-                    "=c" (dummy_value_c),
                     "=d" (dummy_value_d),
+                    "=c" (dummy_value_c),
                     "=S" (dummy_value_S),
                     "=D" (dummy_value_D)
 
                   : "0" (diff),        // eax       // input regs
-// was (unmask)     " "    RESERVED    // ebx       // Global Offset Table idx
-                    "1" (len),         // ecx
-                    "2" (mask),        // edx
-                    "3" (srcptr),      // esi
-                    "4" (dstptr)       // edi
+                    "1" (mask),        // edx
+                    "2" (len),         // ecx
+// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
+                    "3" (srcptr),      // esi/rsi
+                    "4" (dstptr)       // edi/rdi
 
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
                   : "%mm0", "%mm1", "%mm4"          // clobber list
                   , "%mm5", "%mm6", "%mm7"
 #endif
                );
             }
-            else /* mmx _not supported - Use modified C routine */
-#endif /* PNG_MMX_CODE_SUPPORTED */
+            else /* not _mmx_supported - use modified C routine */
             {
                register png_uint_32 i;
                png_uint_32 initial_val = BPP2 * png_pass_start[png_ptr->pass];
@@ -878,7 +1768,7 @@
                }
                if (diff)  /* number of leftover pixels:  3 for pngtest */
                {
-                  final_val+=diff*BPP2;
+                  final_val += diff*BPP2;
                   for (; i < final_val; i += stride)
                   {
                      if (rep_bytes > (int)(final_val-i))
@@ -893,388 +1783,42 @@
             break;
          }       /* end 16 bpp */
 
-         case 24:       /* png_ptr->row_info.pixel_depth */
-         {
-            png_bytep srcptr;
-            png_bytep dstptr;
-
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
-#if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && _mmx_supported */ )
-#else
-            if (_mmx_supported)
-#endif
-            {
-               png_uint_32 len;
-               int diff;
-               int dummy_value_a;   // fix 'forbidden register spilled' error
-               int dummy_value_d;
-               int dummy_value_c;
-               int dummy_value_S;
-               int dummy_value_D;
-               _unmask = ~mask;            // global variable for -fPIC version
-               srcptr = png_ptr->row_buf + 1;
-               dstptr = row;
-               len  = png_ptr->width &~7;  // reduce to multiple of 8
-               diff = (int) (png_ptr->width & 7); // amount lost //
-
-               __asm__ __volatile__ (
-                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
-                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
-                  "punpcklbw %%mm7, %%mm7     \n\t"
-                  "punpcklwd %%mm7, %%mm7     \n\t"
-                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
-
-                  "movq      _mask24_0, %%mm0 \n\t"
-                  "movq      _mask24_1, %%mm1 \n\t"
-                  "movq      _mask24_2, %%mm2 \n\t"
-
-                  "pand      %%mm7, %%mm0     \n\t"
-                  "pand      %%mm7, %%mm1     \n\t"
-                  "pand      %%mm7, %%mm2     \n\t"
-
-                  "pcmpeqb   %%mm6, %%mm0     \n\t"
-                  "pcmpeqb   %%mm6, %%mm1     \n\t"
-                  "pcmpeqb   %%mm6, %%mm2     \n\t"
-
-// preload        "movl      len, %%ecx       \n\t" // load length of line
-// preload        "movl      srcptr, %%esi    \n\t" // load source
-// preload        "movl      dstptr, %%edi    \n\t" // load dest
-
-                  "cmpl      $0, %%ecx        \n\t"
-                  "jz        mainloop24end    \n\t"
-
-                "mainloop24:                  \n\t"
-                  "movq      (%%esi), %%mm4   \n\t"
-                  "pand      %%mm0, %%mm4     \n\t"
-                  "movq      %%mm0, %%mm6     \n\t"
-                  "movq      (%%edi), %%mm7   \n\t"
-                  "pandn     %%mm7, %%mm6     \n\t"
-                  "por       %%mm6, %%mm4     \n\t"
-                  "movq      %%mm4, (%%edi)   \n\t"
-
-                  "movq      8(%%esi), %%mm5  \n\t"
-                  "pand      %%mm1, %%mm5     \n\t"
-                  "movq      %%mm1, %%mm7     \n\t"
-                  "movq      8(%%edi), %%mm6  \n\t"
-                  "pandn     %%mm6, %%mm7     \n\t"
-                  "por       %%mm7, %%mm5     \n\t"
-                  "movq      %%mm5, 8(%%edi)  \n\t"
-
-                  "movq      16(%%esi), %%mm6 \n\t"
-                  "pand      %%mm2, %%mm6     \n\t"
-                  "movq      %%mm2, %%mm4     \n\t"
-                  "movq      16(%%edi), %%mm7 \n\t"
-                  "pandn     %%mm7, %%mm4     \n\t"
-                  "por       %%mm4, %%mm6     \n\t"
-                  "movq      %%mm6, 16(%%edi) \n\t"
-
-                  "addl      $24, %%esi       \n\t" // inc by 24 bytes processed
-                  "addl      $24, %%edi       \n\t"
-                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
-
-                  "ja        mainloop24       \n\t"
-
-                "mainloop24end:               \n\t"
-// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
-                  "movl      %%eax, %%ecx     \n\t"
-                  "cmpl      $0, %%ecx        \n\t"
-                  "jz        end24            \n\t"
-// preload        "movl      mask, %%edx      \n\t"
-                  "sall      $24, %%edx       \n\t" // make low byte, high byte
-
-                "secondloop24:                \n\t"
-                  "sall      %%edx            \n\t" // move high bit to CF
-                  "jnc       skip24           \n\t" // if CF = 0
-                  "movw      (%%esi), %%ax    \n\t"
-                  "movw      %%ax, (%%edi)    \n\t"
-                  "xorl      %%eax, %%eax     \n\t"
-                  "movb      2(%%esi), %%al   \n\t"
-                  "movb      %%al, 2(%%edi)   \n\t"
-
-                "skip24:                      \n\t"
-                  "addl      $3, %%esi        \n\t"
-                  "addl      $3, %%edi        \n\t"
-                  "decl      %%ecx            \n\t"
-                  "jnz       secondloop24     \n\t"
-
-                "end24:                       \n\t"
-                  "EMMS                       \n\t" // DONE
-
-                  : "=a" (dummy_value_a),           // output regs (dummy)
-                    "=d" (dummy_value_d),
-                    "=c" (dummy_value_c),
-                    "=S" (dummy_value_S),
-                    "=D" (dummy_value_D)
-
-                  : "3" (srcptr),      // esi       // input regs
-                    "4" (dstptr),      // edi
-                    "0" (diff),        // eax
-// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
-                    "2" (len),         // ecx
-                    "1" (mask)         // edx
-
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-                  : "%mm0", "%mm1", "%mm2"          // clobber list
-                  , "%mm4", "%mm5", "%mm6", "%mm7"
-#endif
-               );
-            }
-            else /* mmx _not supported - Use modified C routine */
-#endif /* PNG_MMX_CODE_SUPPORTED */
-            {
-               register png_uint_32 i;
-               png_uint_32 initial_val = BPP3 * png_pass_start[png_ptr->pass];
-                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
-               register int stride = BPP3 * png_pass_inc[png_ptr->pass];
-                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
-               register int rep_bytes = BPP3 * png_pass_width[png_ptr->pass];
-                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
-               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
-               int diff = (int) (png_ptr->width & 7); /* amount lost */
-               register png_uint_32 final_val = BPP3 * len;   /* GRR bugfix */
-
-               srcptr = png_ptr->row_buf + 1 + initial_val;
-               dstptr = row + initial_val;
-
-               for (i = initial_val; i < final_val; i += stride)
-               {
-                  png_memcpy(dstptr, srcptr, rep_bytes);
-                  srcptr += stride;
-                  dstptr += stride;
-               }
-               if (diff)  /* number of leftover pixels:  3 for pngtest */
-               {
-                  final_val+=diff*BPP3;
-                  for (; i < final_val; i += stride)
-                  {
-                     if (rep_bytes > (int)(final_val-i))
-                        rep_bytes = (int)(final_val-i);
-                     png_memcpy(dstptr, srcptr, rep_bytes);
-                     srcptr += stride;
-                     dstptr += stride;
-                  }
-               }
-            } /* end of else (_mmx_supported) */
-
-            break;
-         }       /* end 24 bpp */
-
-         case 32:       /* png_ptr->row_info.pixel_depth */
-         {
-            png_bytep srcptr;
-            png_bytep dstptr;
-
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
-#if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && _mmx_supported */ )
-#else
-            if (_mmx_supported)
-#endif
-            {
-               png_uint_32 len;
-               int diff;
-               int dummy_value_a;   // fix 'forbidden register spilled' error
-               int dummy_value_d;
-               int dummy_value_c;
-               int dummy_value_S;
-               int dummy_value_D;
-               _unmask = ~mask;            // global variable for -fPIC version
-               srcptr = png_ptr->row_buf + 1;
-               dstptr = row;
-               len  = png_ptr->width &~7;  // reduce to multiple of 8
-               diff = (int) (png_ptr->width & 7); // amount lost //
-
-               __asm__ __volatile__ (
-                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
-                  "psubb     %%mm6, %%mm6     \n\t" // zero mm6
-                  "punpcklbw %%mm7, %%mm7     \n\t"
-                  "punpcklwd %%mm7, %%mm7     \n\t"
-                  "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
-
-                  "movq      _mask32_0, %%mm0 \n\t"
-                  "movq      _mask32_1, %%mm1 \n\t"
-                  "movq      _mask32_2, %%mm2 \n\t"
-                  "movq      _mask32_3, %%mm3 \n\t"
-
-                  "pand      %%mm7, %%mm0     \n\t"
-                  "pand      %%mm7, %%mm1     \n\t"
-                  "pand      %%mm7, %%mm2     \n\t"
-                  "pand      %%mm7, %%mm3     \n\t"
-
-                  "pcmpeqb   %%mm6, %%mm0     \n\t"
-                  "pcmpeqb   %%mm6, %%mm1     \n\t"
-                  "pcmpeqb   %%mm6, %%mm2     \n\t"
-                  "pcmpeqb   %%mm6, %%mm3     \n\t"
-
-// preload        "movl      len, %%ecx       \n\t" // load length of line
-// preload        "movl      srcptr, %%esi    \n\t" // load source
-// preload        "movl      dstptr, %%edi    \n\t" // load dest
-
-                  "cmpl      $0, %%ecx        \n\t" // lcr
-                  "jz        mainloop32end    \n\t"
-
-                "mainloop32:                  \n\t"
-                  "movq      (%%esi), %%mm4   \n\t"
-                  "pand      %%mm0, %%mm4     \n\t"
-                  "movq      %%mm0, %%mm6     \n\t"
-                  "movq      (%%edi), %%mm7   \n\t"
-                  "pandn     %%mm7, %%mm6     \n\t"
-                  "por       %%mm6, %%mm4     \n\t"
-                  "movq      %%mm4, (%%edi)   \n\t"
-
-                  "movq      8(%%esi), %%mm5  \n\t"
-                  "pand      %%mm1, %%mm5     \n\t"
-                  "movq      %%mm1, %%mm7     \n\t"
-                  "movq      8(%%edi), %%mm6  \n\t"
-                  "pandn     %%mm6, %%mm7     \n\t"
-                  "por       %%mm7, %%mm5     \n\t"
-                  "movq      %%mm5, 8(%%edi)  \n\t"
-
-                  "movq      16(%%esi), %%mm6 \n\t"
-                  "pand      %%mm2, %%mm6     \n\t"
-                  "movq      %%mm2, %%mm4     \n\t"
-                  "movq      16(%%edi), %%mm7 \n\t"
-                  "pandn     %%mm7, %%mm4     \n\t"
-                  "por       %%mm4, %%mm6     \n\t"
-                  "movq      %%mm6, 16(%%edi) \n\t"
-
-                  "movq      24(%%esi), %%mm7 \n\t"
-                  "pand      %%mm3, %%mm7     \n\t"
-                  "movq      %%mm3, %%mm5     \n\t"
-                  "movq      24(%%edi), %%mm4 \n\t"
-                  "pandn     %%mm4, %%mm5     \n\t"
-                  "por       %%mm5, %%mm7     \n\t"
-                  "movq      %%mm7, 24(%%edi) \n\t"
-
-                  "addl      $32, %%esi       \n\t" // inc by 32 bytes processed
-                  "addl      $32, %%edi       \n\t"
-                  "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
-                  "ja        mainloop32       \n\t"
-
-                "mainloop32end:               \n\t"
-// preload        "movl      diff, %%ecx      \n\t" // (diff is in eax)
-                  "movl      %%eax, %%ecx     \n\t"
-                  "cmpl      $0, %%ecx        \n\t"
-                  "jz        end32            \n\t"
-// preload        "movl      mask, %%edx      \n\t"
-                  "sall      $24, %%edx       \n\t" // low byte => high byte
-
-                "secondloop32:                \n\t"
-                  "sall      %%edx            \n\t" // move high bit to CF
-                  "jnc       skip32           \n\t" // if CF = 0
-                  "movl      (%%esi), %%eax   \n\t"
-                  "movl      %%eax, (%%edi)   \n\t"
-
-                "skip32:                      \n\t"
-                  "addl      $4, %%esi        \n\t"
-                  "addl      $4, %%edi        \n\t"
-                  "decl      %%ecx            \n\t"
-                  "jnz       secondloop32     \n\t"
-
-                "end32:                       \n\t"
-                  "EMMS                       \n\t" // DONE
-
-                  : "=a" (dummy_value_a),           // output regs (dummy)
-                    "=d" (dummy_value_d),
-                    "=c" (dummy_value_c),
-                    "=S" (dummy_value_S),
-                    "=D" (dummy_value_D)
-
-                  : "3" (srcptr),      // esi       // input regs
-                    "4" (dstptr),      // edi
-                    "0" (diff),        // eax
-// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
-                    "2" (len),         // ecx
-                    "1" (mask)         // edx
-
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-                  : "%mm0", "%mm1", "%mm2", "%mm3"  // clobber list
-                  , "%mm4", "%mm5", "%mm6", "%mm7"
-#endif
-               );
-            }
-            else /* mmx _not supported - Use modified C routine */
-#endif /* PNG_MMX_CODE_SUPPORTED */
-            {
-               register png_uint_32 i;
-               png_uint_32 initial_val = BPP4 * png_pass_start[png_ptr->pass];
-                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
-               register int stride = BPP4 * png_pass_inc[png_ptr->pass];
-                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
-               register int rep_bytes = BPP4 * png_pass_width[png_ptr->pass];
-                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
-               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
-               int diff = (int) (png_ptr->width & 7); /* amount lost */
-               register png_uint_32 final_val = BPP4 * len;   /* GRR bugfix */
-
-               srcptr = png_ptr->row_buf + 1 + initial_val;
-               dstptr = row + initial_val;
-
-               for (i = initial_val; i < final_val; i += stride)
-               {
-                  png_memcpy(dstptr, srcptr, rep_bytes);
-                  srcptr += stride;
-                  dstptr += stride;
-               }
-               if (diff)  /* number of leftover pixels:  3 for pngtest */
-               {
-                  final_val+=diff*BPP4;
-                  for (; i < final_val; i += stride)
-                  {
-                     if (rep_bytes > (int)(final_val-i))
-                        rep_bytes = (int)(final_val-i);
-                     png_memcpy(dstptr, srcptr, rep_bytes);
-                     srcptr += stride;
-                     dstptr += stride;
-                  }
-               }
-            } /* end of else (_mmx_supported) */
-
-            break;
-         }       /* end 32 bpp */
-
          case 48:       /* png_ptr->row_info.pixel_depth */
          {
             png_bytep srcptr;
             png_bytep dstptr;
 
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
 #if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && _mmx_supported */ )
+            if (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
 #else
             if (_mmx_supported)
 #endif
             {
                png_uint_32 len;
                int diff;
-               int dummy_value_a;   // fix 'forbidden register spilled' error
-               int dummy_value_d;
-               int dummy_value_c;
-               int dummy_value_S;
-               int dummy_value_D;
-               _unmask = ~mask;            // global variable for -fPIC version
+
                srcptr = png_ptr->row_buf + 1;
                dstptr = row;
-               len  = png_ptr->width &~7;  // reduce to multiple of 8
-               diff = (int) (png_ptr->width & 7); // amount lost //
+               len  = png_ptr->width & ~7;          // reduce to multiple of 8
+               diff = (int) (png_ptr->width & 7);   // amount lost
 
                __asm__ __volatile__ (
-                  "movd      _unmask, %%mm7   \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // mask => unmask
+                  "movd      %%edx, %%mm7     \n\t" // load bit pattern
+                  "not       %%edx            \n\t" // unmask => mask for later
                   "psubb     %%mm6, %%mm6     \n\t" // zero mm6
                   "punpcklbw %%mm7, %%mm7     \n\t"
                   "punpcklwd %%mm7, %%mm7     \n\t"
                   "punpckldq %%mm7, %%mm7     \n\t" // fill reg with 8 masks
 
-                  "movq      _mask48_0, %%mm0 \n\t"
-                  "movq      _mask48_1, %%mm1 \n\t"
-                  "movq      _mask48_2, %%mm2 \n\t"
-                  "movq      _mask48_3, %%mm3 \n\t"
-                  "movq      _mask48_4, %%mm4 \n\t"
-                  "movq      _mask48_5, %%mm5 \n\t"
+                  LOAD_GOT_rbp
+                  "movq   " MASK48_0 ", %%mm0 \n\t" // _mask48_0 -> mm0
+                  "movq   " MASK48_1 ", %%mm1 \n\t" // _mask48_1 -> mm1
+                  "movq   " MASK48_2 ", %%mm2 \n\t" // _mask48_2 -> mm2
+                  "movq   " MASK48_3 ", %%mm3 \n\t" // _mask48_3 -> mm3
+                  "movq   " MASK48_4 ", %%mm4 \n\t" // _mask48_4 -> mm4
+                  "movq   " MASK48_5 ", %%mm5 \n\t" // _mask48_5 -> mm5
+                  RESTORE_rbp
 
                   "pand      %%mm7, %%mm0     \n\t"
                   "pand      %%mm7, %%mm1     \n\t"
@@ -1291,57 +1835,57 @@
                   "pcmpeqb   %%mm6, %%mm5     \n\t"
 
 // preload        "movl      len, %%ecx       \n\t" // load length of line
-// preload        "movl      srcptr, %%esi    \n\t" // load source
-// preload        "movl      dstptr, %%edi    \n\t" // load dest
+// preload        "movl      srcptr, %3       \n\t" // load source
+// preload        "movl      dstptr, %4       \n\t" // load dest
 
                   "cmpl      $0, %%ecx        \n\t"
                   "jz        mainloop48end    \n\t"
 
                 "mainloop48:                  \n\t"
-                  "movq      (%%esi), %%mm7   \n\t"
+                  "movq      (%3), %%mm7      \n\t"
                   "pand      %%mm0, %%mm7     \n\t"
                   "movq      %%mm0, %%mm6     \n\t"
-                  "pandn     (%%edi), %%mm6   \n\t"
+                  "pandn     (%4), %%mm6      \n\t"
                   "por       %%mm6, %%mm7     \n\t"
-                  "movq      %%mm7, (%%edi)   \n\t"
+                  "movq      %%mm7, (%4)      \n\t"
 
-                  "movq      8(%%esi), %%mm6  \n\t"
+                  "movq      8(%3), %%mm6     \n\t"
                   "pand      %%mm1, %%mm6     \n\t"
                   "movq      %%mm1, %%mm7     \n\t"
-                  "pandn     8(%%edi), %%mm7  \n\t"
+                  "pandn     8(%4), %%mm7     \n\t"
                   "por       %%mm7, %%mm6     \n\t"
-                  "movq      %%mm6, 8(%%edi)  \n\t"
+                  "movq      %%mm6, 8(%4)     \n\t"
 
-                  "movq      16(%%esi), %%mm6 \n\t"
+                  "movq      16(%3), %%mm6    \n\t"
                   "pand      %%mm2, %%mm6     \n\t"
                   "movq      %%mm2, %%mm7     \n\t"
-                  "pandn     16(%%edi), %%mm7 \n\t"
+                  "pandn     16(%4), %%mm7    \n\t"
                   "por       %%mm7, %%mm6     \n\t"
-                  "movq      %%mm6, 16(%%edi) \n\t"
+                  "movq      %%mm6, 16(%4)    \n\t"
 
-                  "movq      24(%%esi), %%mm7 \n\t"
+                  "movq      24(%3), %%mm7    \n\t"
                   "pand      %%mm3, %%mm7     \n\t"
                   "movq      %%mm3, %%mm6     \n\t"
-                  "pandn     24(%%edi), %%mm6 \n\t"
+                  "pandn     24(%4), %%mm6    \n\t"
                   "por       %%mm6, %%mm7     \n\t"
-                  "movq      %%mm7, 24(%%edi) \n\t"
+                  "movq      %%mm7, 24(%4)    \n\t"
 
-                  "movq      32(%%esi), %%mm6 \n\t"
+                  "movq      32(%3), %%mm6    \n\t"
                   "pand      %%mm4, %%mm6     \n\t"
                   "movq      %%mm4, %%mm7     \n\t"
-                  "pandn     32(%%edi), %%mm7 \n\t"
+                  "pandn     32(%4), %%mm7    \n\t"
                   "por       %%mm7, %%mm6     \n\t"
-                  "movq      %%mm6, 32(%%edi) \n\t"
+                  "movq      %%mm6, 32(%4)    \n\t"
 
-                  "movq      40(%%esi), %%mm7 \n\t"
+                  "movq      40(%3), %%mm7    \n\t"
                   "pand      %%mm5, %%mm7     \n\t"
                   "movq      %%mm5, %%mm6     \n\t"
-                  "pandn     40(%%edi), %%mm6 \n\t"
+                  "pandn     40(%4), %%mm6    \n\t"
                   "por       %%mm6, %%mm7     \n\t"
-                  "movq      %%mm7, 40(%%edi) \n\t"
+                  "movq      %%mm7, 40(%4)    \n\t"
 
-                  "addl      $48, %%esi       \n\t" // inc by 48 bytes processed
-                  "addl      $48, %%edi       \n\t"
+                  "add       $48, %3          \n\t" // inc by 48 bytes processed
+                  "add       $48, %4          \n\t"
                   "subl      $8, %%ecx        \n\t" // dec by 8 pixels processed
 
                   "ja        mainloop48       \n\t"
@@ -1357,12 +1901,14 @@
                 "secondloop48:                \n\t"
                   "sall      %%edx            \n\t" // move high bit to CF
                   "jnc       skip48           \n\t" // if CF = 0
-                  "movl      (%%esi), %%eax   \n\t"
-                  "movl      %%eax, (%%edi)   \n\t"
+                  "movl      (%3), %%eax      \n\t"
+                  "movl      %%eax, (%4)      \n\t"
+                  "movw      4(%3), %%ax      \n\t" // GR-P bugfix 20070717
+                  "movw      %%ax, 4(%4)      \n\t" // GR-P bugfix 20070717
 
                 "skip48:                      \n\t"
-                  "addl      $4, %%esi        \n\t"
-                  "addl      $4, %%edi        \n\t"
+                  "add       $6, %3           \n\t" // GR-P bugfix 20070717
+                  "add       $6, %4           \n\t" // GR-P bugfix 20070717
                   "decl      %%ecx            \n\t"
                   "jnz       secondloop48     \n\t"
 
@@ -1375,21 +1921,20 @@
                     "=S" (dummy_value_S),
                     "=D" (dummy_value_D)
 
-                  : "3" (srcptr),      // esi       // input regs
-                    "4" (dstptr),      // edi
-                    "0" (diff),        // eax
-// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
+                  : "0" (diff),        // eax       // input regs
+                    "1" (mask),        // edx
                     "2" (len),         // ecx
-                    "1" (mask)         // edx
+// was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx
+                    "3" (srcptr),      // esi/rsi
+                    "4" (dstptr)       // edi/rdi
 
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
                   : "%mm0", "%mm1", "%mm2", "%mm3"  // clobber list
                   , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
                );
             }
-            else /* mmx _not supported - Use modified C routine */
-#endif /* PNG_MMX_CODE_SUPPORTED */
+            else /* not _mmx_supported - use modified C routine */
             {
                register png_uint_32 i;
                png_uint_32 initial_val = BPP6 * png_pass_start[png_ptr->pass];
@@ -1413,7 +1958,7 @@
                }
                if (diff)  /* number of leftover pixels:  3 for pngtest */
                {
-                  final_val+=diff*BPP6;
+                  final_val += diff*BPP6;
                   for (; i < final_val; i += stride)
                   {
                      if (rep_bytes > (int)(final_val-i))
@@ -1454,7 +1999,7 @@
             }
             if (diff)  /* number of leftover pixels:  3 for pngtest */
             {
-               final_val+=diff*BPP8;
+               final_val += diff*BPP8;
                for (; i < final_val; i += stride)
                {
                   if (rep_bytes > (int)(final_val-i))
@@ -1470,8 +2015,11 @@
 
          default: /* png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64 */
          {
-            /* this should never happen */
-            png_warning(png_ptr, "Invalid row_info.pixel_depth in pnggccrd");
+            // ERROR:  SHOULD NEVER BE REACHED
+#if defined(PNG_DEBUG)
+            png_debug(1, "Internal libpng logic error (GCC "
+              "png_combine_row() pixel_depth)\n");
+#endif
             break;
          }
       } /* end switch (png_ptr->row_info.pixel_depth) */
@@ -1510,7 +2058,6 @@
 
    png_debug(1, "in png_do_read_interlace (pnggccrd.c)\n");
 
-#if defined(PNG_MMX_CODE_SUPPORTED)
    if (_mmx_supported == 2) {
 #if !defined(PNG_1_0_X)
        /* this should have happened in png_init_mmx_flags() already */
@@ -1518,7 +2065,6 @@
 #endif
        png_mmx_support();
    }
-#endif
 
    if (row != NULL && row_info != NULL)
    {
@@ -1702,12 +2248,6 @@
 
          default: /* 8-bit or larger (this is where the routine is modified) */
          {
-#if 0
-//          static unsigned long long _const4 = 0x0000000000FFFFFFLL;  no good
-//          static unsigned long long const4 = 0x0000000000FFFFFFLL;   no good
-//          unsigned long long _const4 = 0x0000000000FFFFFFLL;         no good
-//          unsigned long long const4 = 0x0000000000FFFFFFLL;          no good
-#endif
             png_bytep sptr, dp;
             png_uint_32 i;
             png_size_t pixel_bytes;
@@ -1723,30 +2263,150 @@
 
             /* New code by Nirav Chhatrapati - Intel Corporation */
 
-#if defined(PNG_MMX_CODE_SUPPORTED)
 #if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_INTERLACE)
-                /* && _mmx_supported */ )
+            if (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_INTERLACE)
 #else
             if (_mmx_supported)
 #endif
             {
-               //--------------------------------------------------------------
-               if (pixel_bytes == 3)
-               {
-                  if (((pass == 0) || (pass == 1)) && width)
-                  {
-                     int dummy_value_c;   // fix 'forbidden register spilled'
-                     int dummy_value_S;
-                     int dummy_value_D;
-                     int dummy_value_a;
+               int dummy_value_c;        // fix 'forbidden register spilled'
+               png_bytep dummy_value_S;
+               png_bytep dummy_value_D;
+               png_bytep dummy_value_a;
+               png_bytep dummy_value_d;
 
+               //--------------------------------------------------------------
+               if (pixel_bytes == BPP3)
+               {
+                  if (((pass == 4) || (pass == 5)) && width)
+                  {
+                     int width_mmx = ((width >> 1) << 1) - 8;   // GRR:  huh?
+                     if (width_mmx < 0)
+                         width_mmx = 0;
+                     width -= width_mmx;        // 8 or 9 pix, 24 or 27 bytes
+                     if (width_mmx)
+                     {
+                        // png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+                        // sptr points at last pixel in pre-expanded row
+                        // dp points at last pixel position in expanded row
+                        __asm__ __volatile__ (
+                           "sub  $3, %1             \n\t"
+                           "sub  $9, %2             \n\t"
+                                        // (png_pass_inc[pass] + 1)*pixel_bytes
+
+                        ".loop3_pass4:              \n\t"
+                           "movq (%1), %%mm0        \n\t" // x x 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // x x 5 4 3 2 1 0
+                           "movq %%mm0, %%mm2       \n\t" // x x 5 4 3 2 1 0
+                           "psllq $24, %%mm0        \n\t" // 4 3 2 1 0 z z z
+                           "pand (%3), %%mm1        \n\t" // z z z z z 2 1 0
+                           "psrlq $24, %%mm2        \n\t" // z z z x x 5 4 3
+                           "por %%mm1, %%mm0        \n\t" // 4 3 2 1 0 2 1 0
+                           "movq %%mm2, %%mm3       \n\t" // z z z x x 5 4 3
+                           "psllq $8, %%mm2         \n\t" // z z x x 5 4 3 z
+                           "movq %%mm0, (%2)        \n\t"
+                           "psrlq $16, %%mm3        \n\t" // z z z z z x x 5
+                           "pand (%4), %%mm3        \n\t" // z z z z z z z 5
+                           "por %%mm3, %%mm2        \n\t" // z z x x 5 4 3 5
+                           "sub  $6, %1             \n\t"
+                           "movd %%mm2, 8(%2)       \n\t"
+                           "sub  $12, %2            \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop3_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           : "=c" (dummy_value_c),        // output regs (dummy)
+                             "=S" (dummy_value_S),
+                             "=D" (dummy_value_D),
+                             "=a" (dummy_value_a),
+                             "=d" (dummy_value_d)
+
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp),            // edi/rdi
+#if defined(PNG_x86_64_USE_GOTPCREL)     // formerly _const4 and _const6:
+                             "3" (&_c64._amask5_3_0), // (0x0000000000FFFFFFLL)
+                             "4" (&_c64._amask7_1_0)  // (0x00000000000000FFLL)
+#else
+                             "3" (&_amask5_3_0),  // eax (0x0000000000FFFFFFLL)
+                             "4" (&_amask7_1_0)   // edx (0x00000000000000FFLL)
+#endif
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0", "%mm1"               // clobber list
+                           , "%mm2", "%mm3"
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx*BPP3;
+                     dp -= width_mmx*2*BPP3;
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+
+                        png_memcpy(v, sptr, BPP3);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           png_memcpy(dp, v, BPP3);
+                           dp -= BPP3;
+                        }
+                        sptr -= BPP3;
+                     }
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
                      __asm__ __volatile__ (
-                        "subl $21, %%edi         \n\t"
+                        "sub  $9, %2             \n\t"
+                                     // (png_pass_inc[pass] - 1)*pixel_bytes
+
+                     ".loop3_pass2:              \n\t"
+                        "movd (%1), %%mm0        \n\t" // x x x x x 2 1 0
+                        "pand (%3), %%mm0        \n\t" // z z z z z 2 1 0
+                        "movq %%mm0, %%mm1       \n\t" // z z z z z 2 1 0
+                        "psllq $16, %%mm0        \n\t" // z z z 2 1 0 z z
+                        "movq %%mm0, %%mm2       \n\t" // z z z 2 1 0 z z
+                        "psllq $24, %%mm0        \n\t" // 2 1 0 z z z z z
+                        "psrlq $8, %%mm1         \n\t" // z z z z z z 2 1
+                        "por %%mm2, %%mm0        \n\t" // 2 1 0 2 1 0 z z
+                        "por %%mm1, %%mm0        \n\t" // 2 1 0 2 1 0 2 1
+                        "movq %%mm0, 4(%2)       \n\t"
+                        "psrlq $16, %%mm0        \n\t" // z z 2 1 0 2 1 0
+                        "sub  $3, %1             \n\t"
+                        "movd %%mm0, (%2)        \n\t"
+                        "sub  $12, %2            \n\t"
+                        "decl %%ecx              \n\t"
+                        "jnz .loop3_pass2        \n\t"
+                        "EMMS                    \n\t" // DONE
+
+                        : "=c" (dummy_value_c),        // output regs (dummy)
+                          "=S" (dummy_value_S),
+                          "=D" (dummy_value_D),
+                          "=a" (dummy_value_a)
+
+                        : "0" (width),         // ecx  // input regs
+                          "1" (sptr),          // esi/rsi
+                          "2" (dp),            // edi/rdi
+#if defined(PNG_x86_64_USE_GOTPCREL)           // formerly _const4:
+                          "3" (&_c64._amask5_3_0)  // (0x0000000000FFFFFFLL)
+#else
+                          "3" (&_amask5_3_0)   // eax (0x0000000000FFFFFFLL)
+#endif
+
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+                        : "%mm0", "%mm1", "%mm2"       // clobber list
+#endif
+                     );
+                  }
+                  else if (width)  // && ((pass == 0) || (pass == 1))
+                  {
+                     __asm__ __volatile__ (
+                        "sub  $21, %2            \n\t"
                                      // (png_pass_inc[pass] - 1)*pixel_bytes
 
                      ".loop3_pass0:              \n\t"
-                        "movd (%%esi), %%mm0     \n\t" // x x x x x 2 1 0
+                        "movd (%1), %%mm0        \n\t" // x x x x x 2 1 0
                         "pand (%3), %%mm0        \n\t" // z z z z z 2 1 0
                         "movq %%mm0, %%mm1       \n\t" // z z z z z 2 1 0
                         "psllq $16, %%mm0        \n\t" // z z z 2 1 0 z z
@@ -1759,13 +2419,13 @@
                         "psllq $16, %%mm0        \n\t" // 0 2 1 0 2 1 z z
                         "movq %%mm3, %%mm4       \n\t" // 2 1 0 2 1 0 2 1
                         "punpckhdq %%mm0, %%mm3  \n\t" // 0 2 1 0 2 1 0 2
-                        "movq %%mm4, 16(%%edi)   \n\t"
+                        "movq %%mm4, 16(%2)      \n\t"
                         "psrlq $32, %%mm0        \n\t" // z z z z 0 2 1 0
-                        "movq %%mm3, 8(%%edi)    \n\t"
+                        "movq %%mm3, 8(%2)       \n\t"
                         "punpckldq %%mm4, %%mm0  \n\t" // 1 0 2 1 0 2 1 0
-                        "subl $3, %%esi          \n\t"
-                        "movq %%mm0, (%%edi)     \n\t"
-                        "subl $24, %%edi         \n\t"
+                        "sub  $3, %1             \n\t"
+                        "movq %%mm0, (%2)        \n\t"
+                        "sub  $24, %2            \n\t"
                         "decl %%ecx              \n\t"
                         "jnz .loop3_pass0        \n\t"
                         "EMMS                    \n\t" // DONE
@@ -1775,162 +2435,308 @@
                           "=D" (dummy_value_D),
                           "=a" (dummy_value_a)
 
+                        : "0" (width),         // ecx  // input regs
+                          "1" (sptr),          // esi/rsi
+                          "2" (dp),            // edi/rdi
+#if defined(PNG_x86_64_USE_GOTPCREL)           // formerly _const4:
+                          "3" (&_c64._amask5_3_0)  // (0x0000000000FFFFFFLL)
+#else
+                          "3" (&_amask5_3_0)   // eax (0x0000000000FFFFFFLL)
+#endif
 
-                        : "1" (sptr),      // esi      // input regs
-                          "2" (dp),        // edi
-                          "0" (width),     // ecx
-                          "3" (&_const4)  // %1(?)  (0x0000000000FFFFFFLL)
-
-#if 0  /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
                         : "%mm0", "%mm1", "%mm2"       // clobber list
                         , "%mm3", "%mm4"
 #endif
                      );
                   }
-                  else if (((pass == 2) || (pass == 3)) && width)
+               } /* end of pixel_bytes == 3 */
+
+               //--------------------------------------------------------------
+               else if (pixel_bytes == BPP4)
+               {
+                  if (((pass == 4) || (pass == 5)) && width)
                   {
-                     int dummy_value_c;   // fix 'forbidden register spilled'
-                     int dummy_value_S;
-                     int dummy_value_D;
-                     int dummy_value_a;
-
-                     __asm__ __volatile__ (
-                        "subl $9, %%edi          \n\t"
-                                     // (png_pass_inc[pass] - 1)*pixel_bytes
-
-                     ".loop3_pass2:              \n\t"
-                        "movd (%%esi), %%mm0     \n\t" // x x x x x 2 1 0
-                        "pand (%3), %%mm0     \n\t" // z z z z z 2 1 0
-                        "movq %%mm0, %%mm1       \n\t" // z z z z z 2 1 0
-                        "psllq $16, %%mm0        \n\t" // z z z 2 1 0 z z
-                        "movq %%mm0, %%mm2       \n\t" // z z z 2 1 0 z z
-                        "psllq $24, %%mm0        \n\t" // 2 1 0 z z z z z
-                        "psrlq $8, %%mm1         \n\t" // z z z z z z 2 1
-                        "por %%mm2, %%mm0        \n\t" // 2 1 0 2 1 0 z z
-                        "por %%mm1, %%mm0        \n\t" // 2 1 0 2 1 0 2 1
-                        "movq %%mm0, 4(%%edi)    \n\t"
-                        "psrlq $16, %%mm0        \n\t" // z z 2 1 0 2 1 0
-                        "subl $3, %%esi          \n\t"
-                        "movd %%mm0, (%%edi)     \n\t"
-                        "subl $12, %%edi         \n\t"
-                        "decl %%ecx              \n\t"
-                        "jnz .loop3_pass2        \n\t"
-                        "EMMS                    \n\t" // DONE
-
-                        : "=c" (dummy_value_c),        // output regs (dummy)
-                          "=S" (dummy_value_S),
-                          "=D" (dummy_value_D),
-                          "=a" (dummy_value_a)
-
-                        : "1" (sptr),      // esi      // input regs
-                          "2" (dp),        // edi
-                          "0" (width),     // ecx
-                          "3" (&_const4)  // (0x0000000000FFFFFFLL)
-
-#if 0  /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                        : "%mm0", "%mm1", "%mm2"       // clobber list
-#endif
-                     );
-                  }
-                  else if (width) /* && ((pass == 4) || (pass == 5)) */
-                  {
-                     int width_mmx = ((width >> 1) << 1) - 8;   // GRR:  huh?
-                     if (width_mmx < 0)
-                         width_mmx = 0;
-                     width -= width_mmx;        // 8 or 9 pix, 24 or 27 bytes
+                     int width_mmx = ((width >> 1) << 1) ;
+                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
                      if (width_mmx)
                      {
-                        // png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
-                        // sptr points at last pixel in pre-expanded row
-                        // dp points at last pixel position in expanded row
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-                        int dummy_value_a;
-                        int dummy_value_d;
-
                         __asm__ __volatile__ (
-                           "subl $3, %%esi          \n\t"
-                           "subl $9, %%edi          \n\t"
-                                        // (png_pass_inc[pass] + 1)*pixel_bytes
+                           "sub  $4, %1             \n\t"
+                           "sub  $12, %2            \n\t"
 
-                        ".loop3_pass4:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // x x 5 4 3 2 1 0
-                           "movq %%mm0, %%mm1       \n\t" // x x 5 4 3 2 1 0
-                           "movq %%mm0, %%mm2       \n\t" // x x 5 4 3 2 1 0
-                           "psllq $24, %%mm0        \n\t" // 4 3 2 1 0 z z z
-                           "pand (%3), %%mm1          \n\t" // z z z z z 2 1 0
-                           "psrlq $24, %%mm2        \n\t" // z z z x x 5 4 3
-                           "por %%mm1, %%mm0        \n\t" // 4 3 2 1 0 2 1 0
-                           "movq %%mm2, %%mm3       \n\t" // z z z x x 5 4 3
-                           "psllq $8, %%mm2         \n\t" // z z x x 5 4 3 z
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "psrlq $16, %%mm3        \n\t" // z z z z z x x 5
-                           "pand (%4), %%mm3     \n\t" // z z z z z z z 5
-                           "por %%mm3, %%mm2        \n\t" // z z x x 5 4 3 5
-                           "subl $6, %%esi          \n\t"
-                           "movd %%mm2, 8(%%edi)    \n\t"
-                           "subl $12, %%edi         \n\t"
+                        ".loop4_pass4:              \n\t"
+                           "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
+                           "movq %%mm0, (%2)        \n\t"
+                           "sub  $8, %1             \n\t"
+                           "movq %%mm1, 8(%2)       \n\t"
+                           "sub  $16, %2            \n\t"
                            "subl $2, %%ecx          \n\t"
-                           "jnz .loop3_pass4        \n\t"
+                           "jnz .loop4_pass4        \n\t"
                            "EMMS                    \n\t" // DONE
 
                            : "=c" (dummy_value_c),        // output regs (dummy)
                              "=S" (dummy_value_S),
-                             "=D" (dummy_value_D),
-                             "=a" (dummy_value_a),
-                             "=d" (dummy_value_d)
+                             "=D" (dummy_value_D)
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx), // ecx
-                             "3" (&_const4), // 0x0000000000FFFFFFLL
-                             "4" (&_const6)  // 0x00000000000000FFLL
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
 
-#if 0  /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
                            : "%mm0", "%mm1"               // clobber list
-                           , "%mm2", "%mm3"
 #endif
                         );
                      }
 
-                     sptr -= width_mmx*3;
-                     dp -= width_mmx*6;
+                     sptr -= (width_mmx*BPP4 - BPP4); // sign fixed
+                     dp -= (width_mmx*2*BPP4 - BPP4); // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
                         int j;
-
-                        png_memcpy(v, sptr, 3);
+                        sptr -= BPP4;
+                        png_memcpy(v, sptr, BPP4);
                         for (j = 0; j < png_pass_inc[pass]; j++)
                         {
-                           png_memcpy(dp, v, 3);
-                           dp -= 3;
+                           dp -= BPP4;
+                           png_memcpy(dp, v, BPP4);
                         }
-                        sptr -= 3;
                      }
                   }
-               } /* end of pixel_bytes == 3 */
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     int width_mmx = ((width >> 1) << 1);
+                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ __volatile__ (
+                           "sub  $4, %1             \n\t"
+                           "sub  $28, %2            \n\t"
+
+                        ".loop4_pass2:              \n\t"
+                           "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
+                           "movq %%mm0, (%2)        \n\t"
+                           "movq %%mm0, 8(%2)       \n\t"
+                           "movq %%mm1, 16(%2)      \n\t"
+                           "movq %%mm1, 24(%2)      \n\t"
+                           "sub  $8, %1             \n\t"
+                           "sub  $32, %2            \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop4_pass2        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           : "=c" (dummy_value_c),        // output regs (dummy)
+                             "=S" (dummy_value_S),
+                             "=D" (dummy_value_D)
+
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
+
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0", "%mm1"               // clobber list
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*4 - 4); // sign fixed
+                     dp -= (width_mmx*16 - 4);  // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 4;
+                        png_memcpy(v, sptr, 4);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 4;
+                           png_memcpy(dp, v, 4);
+                        }
+                     }
+                  }
+                  else if (width)  // && ((pass == 0) || (pass == 1))
+                  {
+                     int width_mmx = ((width >> 1) << 1);
+                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ __volatile__ (
+                           "sub  $4, %1             \n\t"
+                           "sub  $60, %2            \n\t"
+
+                        ".loop4_pass0:              \n\t"
+                           "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
+                           "movq %%mm0, (%2)        \n\t"
+                           "movq %%mm0, 8(%2)       \n\t"
+                           "movq %%mm0, 16(%2)      \n\t"
+                           "movq %%mm0, 24(%2)      \n\t"
+                           "movq %%mm1, 32(%2)      \n\t"
+                           "movq %%mm1, 40(%2)      \n\t"
+                           "movq %%mm1, 48(%2)      \n\t"
+                           "sub  $8, %1             \n\t"
+                           "movq %%mm1, 56(%2)      \n\t"
+                           "sub  $64, %2            \n\t"
+                           "subl $2, %%ecx          \n\t"
+                           "jnz .loop4_pass0        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           : "=c" (dummy_value_c),        // output regs (dummy)
+                             "=S" (dummy_value_S),
+                             "=D" (dummy_value_D)
+
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0", "%mm1"               // clobber list
+#endif
+                        );
+                     }
+
+                     sptr -= (width_mmx*4 - 4); // sign fixed
+                     dp -= (width_mmx*32 - 4);  // sign fixed
+                     for (i = width; i; i--)
+                     {
+                        png_byte v[8];
+                        int j;
+                        sptr -= 4;
+                        png_memcpy(v, sptr, 4);
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           dp -= 4;
+                           png_memcpy(dp, v, 4);
+                        }
+                     }
+                  }
+               } /* end of pixel_bytes == 4 */
 
                //--------------------------------------------------------------
                else if (pixel_bytes == 1)
                {
-                  if (((pass == 0) || (pass == 1)) && width)
+                  if (((pass == 4) || (pass == 5)) && width)
+                  {
+                     int width_mmx = ((width >> 3) << 3);
+                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ __volatile__ (
+                           "sub  $7, %1             \n\t"
+                           "sub  $15, %2            \n\t"
+
+                        ".loop1_pass4:              \n\t"
+                           "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
+                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
+                           "punpckhbw %%mm1, %%mm1  \n\t" // 7 7 6 6 5 5 4 4
+                           "movq %%mm1, 8(%2)       \n\t"
+                           "sub  $8, %1             \n\t"
+                           "movq %%mm0, (%2)        \n\t"
+                           "sub  $16, %2            \n\t"
+                           "subl $8, %%ecx          \n\t"
+                           "jnz .loop1_pass4        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           : "=c" (dummy_value_c),        // output regs (dummy)
+                             "=S" (dummy_value_S),
+                             "=D" (dummy_value_D)
+
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0", "%mm1"               // clobber list
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*2;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           *dp-- = *sptr;
+                        }
+                        --sptr;
+                     }
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
                   {
                      int width_mmx = ((width >> 2) << 2);
                      width -= width_mmx;        // 0-3 pixels => 0-3 bytes
                      if (width_mmx)
                      {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
                         __asm__ __volatile__ (
-                           "subl $3, %%esi          \n\t"
-                           "subl $31, %%edi         \n\t"
+                           "sub  $3, %1             \n\t"
+                           "sub  $15, %2            \n\t"
+
+                        ".loop1_pass2:              \n\t"
+                           "movd (%1), %%mm0        \n\t" // x x x x 3 2 1 0
+                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
+                           "movq %%mm0, %%mm1       \n\t" // 3 3 2 2 1 1 0 0
+                           "punpcklwd %%mm0, %%mm0  \n\t" // 1 1 1 1 0 0 0 0
+                           "punpckhwd %%mm1, %%mm1  \n\t" // 3 3 3 3 2 2 2 2
+                           "movq %%mm0, (%2)        \n\t"
+                           "sub  $4, %1             \n\t"
+                           "movq %%mm1, 8(%2)       \n\t"
+                           "sub  $16, %2            \n\t"
+                           "subl $4, %%ecx          \n\t"
+                           "jnz .loop1_pass2        \n\t"
+                           "EMMS                    \n\t" // DONE
+
+                           : "=c" (dummy_value_c),        // output regs (dummy)
+                             "=S" (dummy_value_S),
+                             "=D" (dummy_value_D)
+
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
+
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0", "%mm1"               // clobber list
+#endif
+                        );
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*4;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           *dp-- = *sptr;
+                        }
+                        --sptr;
+                     }
+                  }
+                  else if (width)  // && ((pass == 0) || (pass == 1))
+                  {
+                     int width_mmx = ((width >> 2) << 2);
+                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
+                     if (width_mmx)
+                     {
+                        __asm__ __volatile__ (
+                           "sub  $3, %1             \n\t"
+                           "sub  $31, %2            \n\t"
 
                         ".loop1_pass0:              \n\t"
-                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "movd (%1), %%mm0        \n\t" // x x x x 3 2 1 0
                            "movq %%mm0, %%mm1       \n\t" // x x x x 3 2 1 0
                            "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
                            "movq %%mm0, %%mm2       \n\t" // 3 3 2 2 1 1 0 0
@@ -1938,16 +2744,16 @@
                            "movq %%mm0, %%mm3       \n\t" // 1 1 1 1 0 0 0 0
                            "punpckldq %%mm0, %%mm0  \n\t" // 0 0 0 0 0 0 0 0
                            "punpckhdq %%mm3, %%mm3  \n\t" // 1 1 1 1 1 1 1 1
-                           "movq %%mm0, (%%edi)     \n\t"
+                           "movq %%mm0, (%2)        \n\t"
                            "punpckhwd %%mm2, %%mm2  \n\t" // 3 3 3 3 2 2 2 2
-                           "movq %%mm3, 8(%%edi)    \n\t"
+                           "movq %%mm3, 8(%2)       \n\t"
                            "movq %%mm2, %%mm4       \n\t" // 3 3 3 3 2 2 2 2
                            "punpckldq %%mm2, %%mm2  \n\t" // 2 2 2 2 2 2 2 2
                            "punpckhdq %%mm4, %%mm4  \n\t" // 3 3 3 3 3 3 3 3
-                           "movq %%mm2, 16(%%edi)   \n\t"
-                           "subl $4, %%esi          \n\t"
-                           "movq %%mm4, 24(%%edi)   \n\t"
-                           "subl $32, %%edi         \n\t"
+                           "movq %%mm2, 16(%2)      \n\t"
+                           "sub  $4, %1             \n\t"
+                           "movq %%mm4, 24(%2)      \n\t"
+                           "sub  $32, %2            \n\t"
                            "subl $4, %%ecx          \n\t"
                            "jnz .loop1_pass0        \n\t"
                            "EMMS                    \n\t" // DONE
@@ -1956,11 +2762,11 @@
                              "=S" (dummy_value_S),
                              "=D" (dummy_value_D)
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
 
-#if 0  /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
                            : "%mm0", "%mm1", "%mm2"       // clobber list
                            , "%mm3", "%mm4"
 #endif
@@ -1998,176 +2804,57 @@
                         --sptr;
                      }
                   }
-                  else if (((pass == 2) || (pass == 3)) && width)
-                  {
-                     int width_mmx = ((width >> 2) << 2);
-                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
-                     if (width_mmx)
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
-                        __asm__ __volatile__ (
-                           "subl $3, %%esi          \n\t"
-                           "subl $15, %%edi         \n\t"
-
-                        ".loop1_pass2:              \n\t"
-                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
-                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
-                           "movq %%mm0, %%mm1       \n\t" // 3 3 2 2 1 1 0 0
-                           "punpcklwd %%mm0, %%mm0  \n\t" // 1 1 1 1 0 0 0 0
-                           "punpckhwd %%mm1, %%mm1  \n\t" // 3 3 3 3 2 2 2 2
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "subl $4, %%esi          \n\t"
-                           "movq %%mm1, 8(%%edi)    \n\t"
-                           "subl $16, %%edi         \n\t"
-                           "subl $4, %%ecx          \n\t"
-                           "jnz .loop1_pass2        \n\t"
-                           "EMMS                    \n\t" // DONE
-
-                           : "=c" (dummy_value_c),        // output regs (dummy)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
-
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
-
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0", "%mm1"               // clobber list
-#endif
-                        );
-                     }
-
-                     sptr -= width_mmx;
-                     dp -= width_mmx*4;
-                     for (i = width; i; i--)
-                     {
-                        int j;
-
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           *dp-- = *sptr;
-                        }
-                        --sptr;
-                     }
-                  }
-                  else if (width)  /* && ((pass == 4) || (pass == 5)) */
-                  {
-                     int width_mmx = ((width >> 3) << 3);
-                     width -= width_mmx;        // 0-3 pixels => 0-3 bytes
-                     if (width_mmx)
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
-                        __asm__ __volatile__ (
-                           "subl $7, %%esi          \n\t"
-                           "subl $15, %%edi         \n\t"
-
-                        ".loop1_pass4:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
-                           "punpcklbw %%mm0, %%mm0  \n\t" // 3 3 2 2 1 1 0 0
-                           "punpckhbw %%mm1, %%mm1  \n\t" // 7 7 6 6 5 5 4 4
-                           "movq %%mm1, 8(%%edi)    \n\t"
-                           "subl $8, %%esi          \n\t"
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "subl $16, %%edi         \n\t"
-                           "subl $8, %%ecx          \n\t"
-                           "jnz .loop1_pass4        \n\t"
-                           "EMMS                    \n\t" // DONE
-
-                           : "=c" (dummy_value_c),        // output regs (none)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
-
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
-
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0", "%mm1"               // clobber list
-#endif
-                        );
-                     }
-
-                     sptr -= width_mmx;
-                     dp -= width_mmx*2;
-                     for (i = width; i; i--)
-                     {
-                        int j;
-
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           *dp-- = *sptr;
-                        }
-                        --sptr;
-                     }
-                  }
                } /* end of pixel_bytes == 1 */
 
                //--------------------------------------------------------------
-               else if (pixel_bytes == 2)
+               else if (pixel_bytes == BPP2)
                {
-                  if (((pass == 0) || (pass == 1)) && width)
+                  if (((pass == 4) || (pass == 5)) && width)
                   {
-                     int width_mmx = ((width >> 1) << 1);
+                     int width_mmx = ((width >> 1) << 1) ;
                      width -= width_mmx;        // 0,1 pixels => 0,2 bytes
                      if (width_mmx)
                      {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
                         __asm__ __volatile__ (
-                           "subl $2, %%esi          \n\t"
-                           "subl $30, %%edi         \n\t"
+                           "sub  $2, %1             \n\t"
+                           "sub  $6, %2             \n\t"
 
-                        ".loop2_pass0:              \n\t"
-                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                        ".loop2_pass4:              \n\t"
+                           "movd (%1), %%mm0        \n\t" // x x x x 3 2 1 0
                            "punpcklwd %%mm0, %%mm0  \n\t" // 3 2 3 2 1 0 1 0
-                           "movq %%mm0, %%mm1       \n\t" // 3 2 3 2 1 0 1 0
-                           "punpckldq %%mm0, %%mm0  \n\t" // 1 0 1 0 1 0 1 0
-                           "punpckhdq %%mm1, %%mm1  \n\t" // 3 2 3 2 3 2 3 2
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "movq %%mm0, 8(%%edi)    \n\t"
-                           "movq %%mm1, 16(%%edi)   \n\t"
-                           "subl $4, %%esi          \n\t"
-                           "movq %%mm1, 24(%%edi)   \n\t"
-                           "subl $32, %%edi         \n\t"
+                           "sub  $4, %1             \n\t"
+                           "movq %%mm0, (%2)        \n\t"
+                           "sub  $8, %2             \n\t"
                            "subl $2, %%ecx          \n\t"
-                           "jnz .loop2_pass0        \n\t"
+                           "jnz .loop2_pass4        \n\t"
                            "EMMS                    \n\t" // DONE
 
                            : "=c" (dummy_value_c),        // output regs (dummy)
                              "=S" (dummy_value_S),
                              "=D" (dummy_value_D)
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
 
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0", "%mm1"               // clobber list
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0"                       // clobber list
 #endif
                         );
                      }
 
-                     sptr -= (width_mmx*2 - 2); // sign fixed
-                     dp -= (width_mmx*16 - 2);  // sign fixed
+                     sptr -= (width_mmx*BPP2 - BPP2); // sign fixed
+                     dp -= (width_mmx*2*BPP2 - BPP2); // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
                         int j;
-                        sptr -= 2;
-                        png_memcpy(v, sptr, 2);
+                        sptr -= BPP2;
+                        png_memcpy(v, sptr, BPP2);
                         for (j = 0; j < png_pass_inc[pass]; j++)
                         {
-                           dp -= 2;
-                           png_memcpy(dp, v, 2);
+                           dp -= BPP2;
+                           png_memcpy(dp, v, BPP2);
                         }
                      }
                   }
@@ -2177,24 +2864,20 @@
                      width -= width_mmx;        // 0,1 pixels => 0,2 bytes
                      if (width_mmx)
                      {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
                         __asm__ __volatile__ (
-                           "subl $2, %%esi          \n\t"
-                           "subl $14, %%edi         \n\t"
+                           "sub  $2, %1             \n\t"
+                           "sub  $14, %2            \n\t"
 
                         ".loop2_pass2:              \n\t"
-                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                           "movd (%1), %%mm0        \n\t" // x x x x 3 2 1 0
                            "punpcklwd %%mm0, %%mm0  \n\t" // 3 2 3 2 1 0 1 0
                            "movq %%mm0, %%mm1       \n\t" // 3 2 3 2 1 0 1 0
                            "punpckldq %%mm0, %%mm0  \n\t" // 1 0 1 0 1 0 1 0
                            "punpckhdq %%mm1, %%mm1  \n\t" // 3 2 3 2 3 2 3 2
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "subl $4, %%esi          \n\t"
-                           "movq %%mm1, 8(%%edi)    \n\t"
-                           "subl $16, %%edi         \n\t"
+                           "movq %%mm0, (%2)        \n\t"
+                           "sub  $4, %1             \n\t"
+                           "movq %%mm1, 8(%2)       \n\t"
+                           "sub  $16, %2            \n\t"
                            "subl $2, %%ecx          \n\t"
                            "jnz .loop2_pass2        \n\t"
                            "EMMS                    \n\t" // DONE
@@ -2203,11 +2886,11 @@
                              "=S" (dummy_value_S),
                              "=D" (dummy_value_D)
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
 
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
                            : "%mm0", "%mm1"               // clobber list
 #endif
                         );
@@ -2228,46 +2911,48 @@
                         }
                      }
                   }
-                  else if (width)  // pass == 4 or 5
+                  else if (width)  // && ((pass == 0) || (pass == 1))
                   {
-                     int width_mmx = ((width >> 1) << 1) ;
+                     int width_mmx = ((width >> 1) << 1);
                      width -= width_mmx;        // 0,1 pixels => 0,2 bytes
                      if (width_mmx)
                      {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
                         __asm__ __volatile__ (
-                           "subl $2, %%esi          \n\t"
-                           "subl $6, %%edi          \n\t"
+                           "sub  $2, %1             \n\t"
+                           "sub  $30, %2            \n\t"
 
-                        ".loop2_pass4:              \n\t"
-                           "movd (%%esi), %%mm0     \n\t" // x x x x 3 2 1 0
+                        ".loop2_pass0:              \n\t"
+                           "movd (%1), %%mm0        \n\t" // x x x x 3 2 1 0
                            "punpcklwd %%mm0, %%mm0  \n\t" // 3 2 3 2 1 0 1 0
-                           "subl $4, %%esi          \n\t"
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "subl $8, %%edi          \n\t"
+                           "movq %%mm0, %%mm1       \n\t" // 3 2 3 2 1 0 1 0
+                           "punpckldq %%mm0, %%mm0  \n\t" // 1 0 1 0 1 0 1 0
+                           "punpckhdq %%mm1, %%mm1  \n\t" // 3 2 3 2 3 2 3 2
+                           "movq %%mm0, (%2)        \n\t"
+                           "movq %%mm0, 8(%2)       \n\t"
+                           "movq %%mm1, 16(%2)      \n\t"
+                           "sub  $4, %1             \n\t"
+                           "movq %%mm1, 24(%2)      \n\t"
+                           "sub  $32, %2            \n\t"
                            "subl $2, %%ecx          \n\t"
-                           "jnz .loop2_pass4        \n\t"
+                           "jnz .loop2_pass0        \n\t"
                            "EMMS                    \n\t" // DONE
 
                            : "=c" (dummy_value_c),        // output regs (dummy)
                              "=S" (dummy_value_S),
                              "=D" (dummy_value_D)
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
+                           : "0" (width_mmx),     // ecx  // input regs
+                             "1" (sptr),          // esi/rsi
+                             "2" (dp)             // edi/rdi
 
-#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0"                       // clobber list
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+                           : "%mm0", "%mm1"               // clobber list
 #endif
                         );
                      }
 
                      sptr -= (width_mmx*2 - 2); // sign fixed
-                     dp -= (width_mmx*4 - 2);   // sign fixed
+                     dp -= (width_mmx*16 - 2);  // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
@@ -2284,227 +2969,36 @@
                } /* end of pixel_bytes == 2 */
 
                //--------------------------------------------------------------
-               else if (pixel_bytes == 4)
-               {
-                  if (((pass == 0) || (pass == 1)) && width)
-                  {
-                     int width_mmx = ((width >> 1) << 1);
-                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
-                     if (width_mmx)
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
-                        __asm__ __volatile__ (
-                           "subl $4, %%esi          \n\t"
-                           "subl $60, %%edi         \n\t"
-
-                        ".loop4_pass0:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
-                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
-                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "movq %%mm0, 8(%%edi)    \n\t"
-                           "movq %%mm0, 16(%%edi)   \n\t"
-                           "movq %%mm0, 24(%%edi)   \n\t"
-                           "movq %%mm1, 32(%%edi)   \n\t"
-                           "movq %%mm1, 40(%%edi)   \n\t"
-                           "movq %%mm1, 48(%%edi)   \n\t"
-                           "subl $8, %%esi          \n\t"
-                           "movq %%mm1, 56(%%edi)   \n\t"
-                           "subl $64, %%edi         \n\t"
-                           "subl $2, %%ecx          \n\t"
-                           "jnz .loop4_pass0        \n\t"
-                           "EMMS                    \n\t" // DONE
-
-                           : "=c" (dummy_value_c),        // output regs (dummy)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
-
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
-
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0", "%mm1"               // clobber list
-#endif
-                        );
-                     }
-
-                     sptr -= (width_mmx*4 - 4); // sign fixed
-                     dp -= (width_mmx*32 - 4);  // sign fixed
-                     for (i = width; i; i--)
-                     {
-                        png_byte v[8];
-                        int j;
-                        sptr -= 4;
-                        png_memcpy(v, sptr, 4);
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           dp -= 4;
-                           png_memcpy(dp, v, 4);
-                        }
-                     }
-                  }
-                  else if (((pass == 2) || (pass == 3)) && width)
-                  {
-                     int width_mmx = ((width >> 1) << 1);
-                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
-                     if (width_mmx)
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
-                        __asm__ __volatile__ (
-                           "subl $4, %%esi          \n\t"
-                           "subl $28, %%edi         \n\t"
-
-                        ".loop4_pass2:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
-                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
-                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "movq %%mm0, 8(%%edi)    \n\t"
-                           "movq %%mm1, 16(%%edi)   \n\t"
-                           "movq %%mm1, 24(%%edi)   \n\t"
-                           "subl $8, %%esi          \n\t"
-                           "subl $32, %%edi         \n\t"
-                           "subl $2, %%ecx          \n\t"
-                           "jnz .loop4_pass2        \n\t"
-                           "EMMS                    \n\t" // DONE
-
-                           : "=c" (dummy_value_c),        // output regs (dummy)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
-
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
-
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0", "%mm1"               // clobber list
-#endif
-                        );
-                     }
-
-                     sptr -= (width_mmx*4 - 4); // sign fixed
-                     dp -= (width_mmx*16 - 4);  // sign fixed
-                     for (i = width; i; i--)
-                     {
-                        png_byte v[8];
-                        int j;
-                        sptr -= 4;
-                        png_memcpy(v, sptr, 4);
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           dp -= 4;
-                           png_memcpy(dp, v, 4);
-                        }
-                     }
-                  }
-                  else if (width)  // pass == 4 or 5
-                  {
-                     int width_mmx = ((width >> 1) << 1) ;
-                     width -= width_mmx;        // 0,1 pixels => 0,4 bytes
-                     if (width_mmx)
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
-
-                        __asm__ __volatile__ (
-                           "subl $4, %%esi          \n\t"
-                           "subl $12, %%edi         \n\t"
-
-                        ".loop4_pass4:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                           "movq %%mm0, %%mm1       \n\t" // 7 6 5 4 3 2 1 0
-                           "punpckldq %%mm0, %%mm0  \n\t" // 3 2 1 0 3 2 1 0
-                           "punpckhdq %%mm1, %%mm1  \n\t" // 7 6 5 4 7 6 5 4
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "subl $8, %%esi          \n\t"
-                           "movq %%mm1, 8(%%edi)    \n\t"
-                           "subl $16, %%edi         \n\t"
-                           "subl $2, %%ecx          \n\t"
-                           "jnz .loop4_pass4        \n\t"
-                           "EMMS                    \n\t" // DONE
-
-                           : "=c" (dummy_value_c),        // output regs (dummy)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
-
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width_mmx)  // ecx
-
-#if 0  /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0", "%mm1"               // clobber list
-#endif
-                        );
-                     }
-
-                     sptr -= (width_mmx*4 - 4); // sign fixed
-                     dp -= (width_mmx*8 - 4);   // sign fixed
-                     for (i = width; i; i--)
-                     {
-                        png_byte v[8];
-                        int j;
-                        sptr -= 4;
-                        png_memcpy(v, sptr, 4);
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           dp -= 4;
-                           png_memcpy(dp, v, 4);
-                        }
-                     }
-                  }
-               } /* end of pixel_bytes == 4 */
-
-               //--------------------------------------------------------------
-               else if (pixel_bytes == 8)
+               else if (pixel_bytes == BPP8)
                {
 // GRR TEST:  should work, but needs testing (special 64-bit version of rpng2?)
                   // GRR NOTE:  no need to combine passes here!
-                  if (((pass == 0) || (pass == 1)) && width)
+                  if (((pass == 4) || (pass == 5)) && width)
                   {
-                     int dummy_value_c;  // fix 'forbidden register spilled'
-                     int dummy_value_S;
-                     int dummy_value_D;
-
                      // source is 8-byte RRGGBBAA
-                     // dest is 64-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA ...
+                     // dest is 16-byte RRGGBBAA RRGGBBAA
                      __asm__ __volatile__ (
-                        "subl $56, %%edi         \n\t" // start of last block
+                        "sub  $8, %2             \n\t" // start of last block
 
-                     ".loop8_pass0:              \n\t"
-                        "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                        "movq %%mm0, (%%edi)     \n\t"
-                        "movq %%mm0, 8(%%edi)    \n\t"
-                        "movq %%mm0, 16(%%edi)   \n\t"
-                        "movq %%mm0, 24(%%edi)   \n\t"
-                        "movq %%mm0, 32(%%edi)   \n\t"
-                        "movq %%mm0, 40(%%edi)   \n\t"
-                        "movq %%mm0, 48(%%edi)   \n\t"
-                        "subl $8, %%esi          \n\t"
-                        "movq %%mm0, 56(%%edi)   \n\t"
-                        "subl $64, %%edi         \n\t"
+                     ".loop8_pass4:              \n\t"
+                        "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                        "movq %%mm0, (%2)        \n\t"
+                        "sub  $8, %1             \n\t"
+                        "movq %%mm0, 8(%2)       \n\t"
+                        "sub  $16, %2            \n\t"
                         "decl %%ecx              \n\t"
-                        "jnz .loop8_pass0        \n\t"
+                        "jnz .loop8_pass4        \n\t"
                         "EMMS                    \n\t" // DONE
 
                         : "=c" (dummy_value_c),        // output regs (dummy)
                           "=S" (dummy_value_S),
                           "=D" (dummy_value_D)
 
-                        : "1" (sptr),      // esi      // input regs
-                          "2" (dp),        // edi
-                          "0" (width)      // ecx
+                        : "0" (width),         // ecx  // input regs
+                          "1" (sptr),          // esi/rsi
+                          "2" (dp)             // edi/rdi
 
-#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
                         : "%mm0"                       // clobber list
 #endif
                      );
@@ -2515,122 +3009,143 @@
                      // dest is 32-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA
                      // (recall that expansion is _in place_:  sptr and dp
                      //  both point at locations within same row buffer)
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
+                     __asm__ __volatile__ (
+                        "sub  $24, %2            \n\t" // start of last block
 
-                        __asm__ __volatile__ (
-                           "subl $24, %%edi         \n\t" // start of last block
+                     ".loop8_pass2:              \n\t"
+                        "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                        "movq %%mm0, (%2)        \n\t"
+                        "movq %%mm0, 8(%2)       \n\t"
+                        "movq %%mm0, 16(%2)      \n\t"
+                        "sub  $8, %1             \n\t"
+                        "movq %%mm0, 24(%2)      \n\t"
+                        "sub  $32, %2            \n\t"
+                        "decl %%ecx              \n\t"
+                        "jnz .loop8_pass2        \n\t"
+                        "EMMS                    \n\t" // DONE
 
-                        ".loop8_pass2:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "movq %%mm0, 8(%%edi)    \n\t"
-                           "movq %%mm0, 16(%%edi)   \n\t"
-                           "subl $8, %%esi          \n\t"
-                           "movq %%mm0, 24(%%edi)   \n\t"
-                           "subl $32, %%edi         \n\t"
-                           "decl %%ecx              \n\t"
-                           "jnz .loop8_pass2        \n\t"
-                           "EMMS                    \n\t" // DONE
+                        : "=c" (dummy_value_c),        // output regs (dummy)
+                          "=S" (dummy_value_S),
+                          "=D" (dummy_value_D)
 
-                           : "=c" (dummy_value_c),        // output regs (dummy)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
+                        : "0" (width),         // ecx  // input regs
+                          "1" (sptr),          // esi/rsi
+                          "2" (dp)             // edi/rdi
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width)      // ecx
-
-#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0"                       // clobber list
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                        : "%mm0"                       // clobber list
 #endif
-                        );
-                     }
+                     );
                   }
-                  else if (width)  // pass == 4 or 5
+                  else if (width)  // && ((pass == 0) || (pass == 1))
                   {
                      // source is 8-byte RRGGBBAA
-                     // dest is 16-byte RRGGBBAA RRGGBBAA
-                     {
-                        int dummy_value_c;  // fix 'forbidden register spilled'
-                        int dummy_value_S;
-                        int dummy_value_D;
+                     // dest is 64-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA ...
+                     __asm__ __volatile__ (
+                        "sub  $56, %2            \n\t" // start of last block
 
-                        __asm__ __volatile__ (
-                           "subl $8, %%edi          \n\t" // start of last block
+                     ".loop8_pass0:              \n\t"
+                        "movq (%1), %%mm0        \n\t" // 7 6 5 4 3 2 1 0
+                        "movq %%mm0, (%2)        \n\t"
+                        "movq %%mm0, 8(%2)       \n\t"
+                        "movq %%mm0, 16(%2)      \n\t"
+                        "movq %%mm0, 24(%2)      \n\t"
+                        "movq %%mm0, 32(%2)      \n\t"
+                        "movq %%mm0, 40(%2)      \n\t"
+                        "movq %%mm0, 48(%2)      \n\t"
+                        "sub  $8, %1             \n\t"
+                        "movq %%mm0, 56(%2)      \n\t"
+                        "sub  $64, %2            \n\t"
+                        "decl %%ecx              \n\t"
+                        "jnz .loop8_pass0        \n\t"
+                        "EMMS                    \n\t" // DONE
 
-                        ".loop8_pass4:              \n\t"
-                           "movq (%%esi), %%mm0     \n\t" // 7 6 5 4 3 2 1 0
-                           "movq %%mm0, (%%edi)     \n\t"
-                           "subl $8, %%esi          \n\t"
-                           "movq %%mm0, 8(%%edi)    \n\t"
-                           "subl $16, %%edi         \n\t"
-                           "decl %%ecx              \n\t"
-                           "jnz .loop8_pass4        \n\t"
-                           "EMMS                    \n\t" // DONE
+                        : "=c" (dummy_value_c),        // output regs (dummy)
+                          "=S" (dummy_value_S),
+                          "=D" (dummy_value_D)
 
-                           : "=c" (dummy_value_c),        // output regs (dummy)
-                             "=S" (dummy_value_S),
-                             "=D" (dummy_value_D)
+                        : "0" (width),         // ecx  // input regs
+                          "1" (sptr),          // esi/rsi
+                          "2" (dp)             // edi/rdi
 
-                           : "1" (sptr),      // esi      // input regs
-                             "2" (dp),        // edi
-                             "0" (width)      // ecx
-
-#if 0  /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */
-                           : "%mm0"                       // clobber list
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
+                        : "%mm0"                       // clobber list
 #endif
-                        );
-                     }
+                     );
                   }
-
                } /* end of pixel_bytes == 8 */
 
                //--------------------------------------------------------------
-               else if (pixel_bytes == 6)
+               else if (pixel_bytes == BPP6)   // why no MMX for this case?
                {
                   for (i = width; i; i--)
                   {
                      png_byte v[8];
                      int j;
-                     png_memcpy(v, sptr, 6);
+                     png_memcpy(v, sptr, BPP6);
                      for (j = 0; j < png_pass_inc[pass]; j++)
                      {
-                        png_memcpy(dp, v, 6);
-                        dp -= 6;
+                        png_memcpy(dp, v, BPP6);
+                        dp -= BPP6;
                      }
-                     sptr -= 6;
+                     sptr -= BPP6;
                   }
                } /* end of pixel_bytes == 6 */
 
                //--------------------------------------------------------------
                else
                {
-                  for (i = width; i; i--)
-                  {
-                     png_byte v[8];
-                     int j;
-                     png_memcpy(v, sptr, pixel_bytes);
-                     for (j = 0; j < png_pass_inc[pass]; j++)
-                     {
-                        png_memcpy(dp, v, pixel_bytes);
-                        dp -= pixel_bytes;
-                     }
-                     sptr-= pixel_bytes;
-                  }
+                  // ERROR:  SHOULD NEVER BE REACHED
+#if defined(PNG_DEBUG)
+                  png_debug(1, "Internal libpng logic error (GCC "
+                    "png_do_read_interlace() _mmx_supported)\n");
+#endif
                }
+
             } // end of _mmx_supported ========================================
 
             else /* MMX not supported:  use modified C code - takes advantage
                   *   of inlining of png_memcpy for a constant */
-                 /* GRR 19991007:  does it?  or should pixel_bytes in each
-                  *   block be replaced with immediate value (e.g., 1)? */
-                 /* GRR 19991017:  replaced with constants in each case */
-#endif /* PNG_MMX_CODE_SUPPORTED */
             {
-               if (pixel_bytes == 1)
+               if (pixel_bytes == BPP3)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, BPP3);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, BPP3);
+                        dp -= BPP3;
+                     }
+                     sptr -= BPP3;
+                  }
+               }
+               else if (pixel_bytes == BPP4)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, BPP4);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+#if defined(PNG_DEBUG) && defined(PNG_1_0_X)  // row_buf_size gone in 1.2.x
+                        if (dp < row || dp+3 > row+png_ptr->row_buf_size)
+                        {
+                           printf("dp out of bounds: row=%10p, dp=%10p, "
+                             "rp=%10p\n", row, dp, row+png_ptr->row_buf_size);
+                           printf("row_buf_size=%lu\n", png_ptr->row_buf_size);
+                        }
+#endif
+                        png_memcpy(dp, v, BPP4);
+                        dp -= BPP4;
+                     }
+                     sptr -= BPP4;
+                  }
+               }
+               else if (pixel_bytes == 1)
                {
                   for (i = width; i; i--)
                   {
@@ -2642,108 +3157,63 @@
                      --sptr;
                   }
                }
-               else if (pixel_bytes == 3)
+               else if (pixel_bytes == BPP2)
                {
                   for (i = width; i; i--)
                   {
                      png_byte v[8];
                      int j;
-                     png_memcpy(v, sptr, 3);
+                     png_memcpy(v, sptr, BPP2);
                      for (j = 0; j < png_pass_inc[pass]; j++)
                      {
-                        png_memcpy(dp, v, 3);
-                        dp -= 3;
+                        png_memcpy(dp, v, BPP2);
+                        dp -= BPP2;
                      }
-                     sptr -= 3;
+                     sptr -= BPP2;
                   }
                }
-               else if (pixel_bytes == 2)
+               else if (pixel_bytes == BPP6)
                {
                   for (i = width; i; i--)
                   {
                      png_byte v[8];
                      int j;
-                     png_memcpy(v, sptr, 2);
+                     png_memcpy(v, sptr, BPP6);
                      for (j = 0; j < png_pass_inc[pass]; j++)
                      {
-                        png_memcpy(dp, v, 2);
-                        dp -= 2;
+                        png_memcpy(dp, v, BPP6);
+                        dp -= BPP6;
                      }
-                     sptr -= 2;
+                     sptr -= BPP6;
                   }
                }
-               else if (pixel_bytes == 4)
+               else if (pixel_bytes == BPP8)
                {
                   for (i = width; i; i--)
                   {
                      png_byte v[8];
                      int j;
-                     png_memcpy(v, sptr, 4);
+                     png_memcpy(v, sptr, BPP8);
                      for (j = 0; j < png_pass_inc[pass]; j++)
                      {
-#ifdef PNG_DEBUG
-                        if (dp < row || dp+3 > row+png_ptr->row_buf_size)
-                        {
-                           printf("dp out of bounds: row=%d, dp=%d, rp=%d\n",
-                             row, dp, row+png_ptr->row_buf_size);
-                           printf("row_buf=%d\n",png_ptr->row_buf_size);
-                        }
+                        png_memcpy(dp, v, BPP8);
+                        dp -= BPP8;
+                     }
+                     sptr -= BPP8;
+                  }
+               }
+               else
+               {
+                  // ERROR:  SHOULD NEVER BE REACHED
+#if defined(PNG_DEBUG)
+                  png_debug(1, "Internal libpng logic error (GCC "
+                    "png_do_read_interlace() !_mmx_supported)\n");
 #endif
-                        png_memcpy(dp, v, 4);
-                        dp -= 4;
-                     }
-                     sptr -= 4;
-                  }
-               }
-               else if (pixel_bytes == 6)
-               {
-                  for (i = width; i; i--)
-                  {
-                     png_byte v[8];
-                     int j;
-                     png_memcpy(v, sptr, 6);
-                     for (j = 0; j < png_pass_inc[pass]; j++)
-                     {
-                        png_memcpy(dp, v, 6);
-                        dp -= 6;
-                     }
-                     sptr -= 6;
-                  }
-               }
-               else if (pixel_bytes == 8)
-               {
-                  for (i = width; i; i--)
-                  {
-                     png_byte v[8];
-                     int j;
-                     png_memcpy(v, sptr, 8);
-                     for (j = 0; j < png_pass_inc[pass]; j++)
-                     {
-                        png_memcpy(dp, v, 8);
-                        dp -= 8;
-                     }
-                     sptr -= 8;
-                  }
-               }
-               else     /* GRR:  should never be reached */
-               {
-                  for (i = width; i; i--)
-                  {
-                     png_byte v[8];
-                     int j;
-                     png_memcpy(v, sptr, pixel_bytes);
-                     for (j = 0; j < png_pass_inc[pass]; j++)
-                     {
-                        png_memcpy(dp, v, pixel_bytes);
-                        dp -= pixel_bytes;
-                     }
-                     sptr -= pixel_bytes;
-                  }
                }
 
             } /* end if (MMX not supported) */
             break;
-         }
+         } /* end default (8-bit or larger) */
       } /* end switch (row_info->pixel_depth) */
 
       row_info->width = final_width;
@@ -2759,19 +3229,8 @@
 
 
 #if defined(PNG_HAVE_MMX_READ_FILTER_ROW)
-#if defined(PNG_MMX_CODE_SUPPORTED)
+#if defined(PNG_MMX_READ_FILTER_AVG_SUPPORTED)
 
-// These variables are utilized in the functions below.  They are declared
-// globally here to ensure alignment on 8-byte boundaries.
-
-union uAll {
-   long long use;
-   double  align;
-} _LBCarryMask = {0x0101010101010101LL},
-  _HBClearMask = {0x7f7f7f7f7f7f7f7fLL},
-  _ActiveMask, _ActiveMask2, _ActiveMaskEnd, _ShiftBpp, _ShiftRem;
-
-#ifdef PNG_THREAD_UNSAFE_OK
 //===========================================================================//
 //                                                                           //
 //           P N G _ R E A D _ F I L T E R _ R O W _ M M X _ A V G           //
@@ -2784,90 +3243,117 @@
 png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row,
                             png_bytep prev_row)
 {
+   unsigned FullLength, MMXLength;  // png_uint_32 is actually 64-bit on x86-64
    int bpp;
+   int dummy_value_a;
    int dummy_value_c;   // fix 'forbidden register 2 (cx) was spilled' error
-   int dummy_value_S;
-   int dummy_value_D;
+   int dummy_value_d;
+   png_bytep dummy_value_S;
+   png_bytep dummy_value_D;
+   int diff; //     __attribute__((used));
 
-   bpp = (row_info->pixel_depth + 7) >> 3;  // get # bytes per pixel
-   _FullLength  = row_info->rowbytes;       // # of bytes to filter
+   bpp = (row_info->pixel_depth + 7) >> 3;  // calc number of bytes per pixel
+   FullLength = row_info->rowbytes;         // number of bytes to filter
 
    __asm__ __volatile__ (
+   "avg_top:                       \n\t"
+      SAVE_GOT_ebx
+      SAVE_r15
+      SAVE_ebp
       // initialize address pointers and offset
-#ifdef __PIC__
-      "pushl %%ebx                 \n\t" // save index to Global Offset Table
-#endif
-//pre "movl row, %%edi             \n\t" // edi:  Avg(x)
+//pre "movl row, %5                \n\t" // edi/rdi:  ptr to Avg(x)
       "xorl %%ebx, %%ebx           \n\t" // ebx:  x
-      "movl %%edi, %%edx           \n\t"
-//pre "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
-//pre "subl bpp, %%edx             \n\t" // (bpp is preloaded into ecx)
-      "subl %%ecx, %%edx           \n\t" // edx:  Raw(x-bpp)
-
-      "xorl %%eax,%%eax            \n\t"
+//pre "movl prev_row, %4           \n\t" // esi/rsi:  ptr to Prior(x)
+      "mov  %5, " PDX "            \n\t" // copy of row ptr...
+//pre "subl bpp, " PDX "           \n\t" // (bpp is preloaded into ecx)
+      "sub  " PCX "," PDX "        \n\t" // edx/rdx:  ptr to Raw(x-bpp)
+//pre "movl FullLength, %%eax      \n\t" // bring in via eax...
+      SAVE_FullLength                    // ...but store for later use
+      "xorl %%eax, %%eax           \n\t"
 
       // Compute the Raw value for the first bpp bytes
       //    Raw(x) = Avg(x) + (Prior(x)/2)
    "avg_rlp:                       \n\t"
-      "movb (%%esi,%%ebx,),%%al    \n\t" // load al with Prior(x)
+      "movb (%4," PBX ",), %%al    \n\t" // load al with Prior(x)
       "incl %%ebx                  \n\t"
       "shrb %%al                   \n\t" // divide by 2
-      "addb -1(%%edi,%%ebx,),%%al  \n\t" // add Avg(x); -1 to offset inc ebx
+      "addb -1(%5," PBX ",), %%al  \n\t" // add Avg(x); -1 to offset inc ebx
 //pre "cmpl bpp, %%ebx             \n\t" // (bpp is preloaded into ecx)
       "cmpl %%ecx, %%ebx           \n\t"
-      "movb %%al,-1(%%edi,%%ebx,)  \n\t" // write Raw(x); -1 to offset inc ebx
+      "movb %%al, -1(%5," PBX ",)  \n\t" // write Raw(x); -1 to offset inc ebx
       "jb avg_rlp                  \n\t" // mov does not affect flags
 
-      // get # of bytes to alignment
-      "movl %%edi, _dif            \n\t" // take start of row
-      "addl %%ebx, _dif            \n\t" // add bpp
-      "addl $0xf, _dif             \n\t" // add 7+8 to incr past alignment bdry
-      "andl $0xfffffff8, _dif      \n\t" // mask to alignment boundary
-      "subl %%edi, _dif            \n\t" // subtract from start => value ebx at
-      "jz avg_go                   \n\t" //  alignment
+      // get # of bytes to alignment (32-bit mask _would_ be good enough
+      // [computing delta], but 32-bit ops are zero-extended on 64-bit, argh)
+      // (if swapped edx and ebp, could do 8-bit or 16-bit mask...FIXME?)
+      "mov  %5, " PBP "            \n\t" // take start of row
+      "add  " PBX "," PBP "        \n\t" // add bpp
+      "add  $0xf, " PBP "          \n\t" // add 7+8 to incr past alignment bdry
+//    "andl $0xfffffff8, %%ebp     \n\t" // mask to alignment boundary (32-bit!)
+      CLEAR_BOTTOM_3_BITS  PBP    "\n\t" // mask to alignment boundary
+      "sub  %5, " PBP "            \n\t" // subtract row ptr again => ebp =
+      "jz avg_go                   \n\t" //  target value of ebx at alignment
+
+      "xorl %%ecx, %%ecx           \n\t"
 
       // fix alignment
       // Compute the Raw value for the bytes up to the alignment boundary
       //    Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
-      "xorl %%ecx, %%ecx           \n\t"
-
    "avg_lp1:                       \n\t"
       "xorl %%eax, %%eax           \n\t"
-      "movb (%%esi,%%ebx,), %%cl   \n\t" // load cl with Prior(x)
-      "movb (%%edx,%%ebx,), %%al   \n\t" // load al with Raw(x-bpp)
+      "movb (%4," PBX ",), %%cl    \n\t" // load cl with Prior(x)
+      "movb (" PDX "," PBX ",), %%al \n\t" // load al with Raw(x-bpp)
       "addw %%cx, %%ax             \n\t"
       "incl %%ebx                  \n\t"
       "shrw %%ax                   \n\t" // divide by 2
-      "addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset inc ebx
-      "cmpl _dif, %%ebx            \n\t" // check if at alignment boundary
-      "movb %%al, -1(%%edi,%%ebx,) \n\t" // write Raw(x); -1 to offset inc ebx
+      "addb -1(%5," PBX ",), %%al  \n\t" // add Avg(x); -1 to offset inc ebx
+      "cmpl %%ebp, %%ebx           \n\t" // check if at alignment boundary
+      "movb %%al, -1(%5," PBX ",)  \n\t" // write Raw(x); -1 to offset inc ebx
       "jb avg_lp1                  \n\t" // repeat until at alignment boundary
 
    "avg_go:                        \n\t"
-      "movl _FullLength, %%eax     \n\t"
-      "movl %%eax, %%ecx           \n\t"
+      RESTORE_FullLength "%%eax    \n\t" // FullLength -> eax
+      "movl %%eax, %%ecx           \n\t" // copy -> ecx
       "subl %%ebx, %%eax           \n\t" // subtract alignment fix
       "andl $0x00000007, %%eax     \n\t" // calc bytes over mult of 8
-      "subl %%eax, %%ecx           \n\t" // drop over bytes from original length
-      "movl %%ecx, _MMXLength      \n\t"
-#ifdef __PIC__
-      "popl %%ebx                  \n\t" // restore index to Global Offset Table
-#endif
+      "subl %%eax, %%ecx           \n\t" // sub over-bytes from original length
+//out "movl %%ecx, MMXLength       \n\t"
+      "movl %%ebp, %%eax           \n\t" // ebp = diff, but no reg constraint(?)
+      RESTORE_ebp                        //  (could swap ebp and edx functions)
+      RESTORE_r15
+      RESTORE_GOT_ebx
 
-      : "=c" (dummy_value_c),            // output regs (dummy)
-        "=S" (dummy_value_S),
-        "=D" (dummy_value_D)
+// "There is no way for you to specify that an input operand is modified
+// without also specifying it as an output operand."  [makes sense]
 
-      : "0" (bpp),       // ecx          // input regs
-        "1" (prev_row),  // esi
-        "2" (row)        // edi
+// "Unless an output operand has the `&' constraint modifier, GCC may
+// allocate it in the same register as an unrelated input operand, on the
+// assumption the inputs are consumed before the outputs are produced."
+// [trying to _force_ this]
 
-      : "%eax", "%edx"                   // clobber list
-#ifndef __PIC__
-      , "%ebx"
-#endif
-      // GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength)
-      // (seems to work fine without...)
+// "`='   Means that this operand is write-only for this instruction:
+//        the previous value is discarded and replaced by output data."
+//        [operand == variable name, presumably]
+
+      // output regs
+      // these are operands 0-1 (originally 0-3):
+      : "=c" (MMXLength),      // %0 -> %0
+        "=a" (diff)            // %3 -> %1
+//      "=S" (dummy_value_S),  // %1 -> GONE
+//      "=D" (dummy_value_D),  // %2 -> GONE
+
+      // input regs
+      // these are operands 2-5 (originally 4-7); two of their constraints say
+      // they must go in same places as operands 0-1 (originally 0-3) above:
+      : "0" (bpp),         // %4 -> %2 ecx
+        "1" (FullLength),  // %7 -> %3 eax
+        "S" (prev_row),    // %5 -> %4 esi/rsi
+        "D" (row)          // %6 -> %5 edi/rdi
+
+      : "%edx"                           // clobber list
+        _CLOBBER_r15
+        _CLOBBER_ebp
+        _CLOBBER_GOT_ebx
    );
 
    // now do the math for the rest of the row
@@ -2875,152 +3361,149 @@
    {
       case 3:
       {
-         _ActiveMask.use  = 0x0000000000ffffffLL;
-         _ShiftBpp.use = 24;    // == 3 * 8
-         _ShiftRem.use = 40;    // == 64 - 24
+//       _ShiftBpp = 24;    // == 3 * 8
+//       _ShiftRem = 40;    // == 64 - 24
 
          __asm__ __volatile__ (
             // re-init address pointers and offset
-            "movq _ActiveMask, %%mm7      \n\t"
-            "movl _dif, %%ecx             \n\t" // ecx:  x = offset to
-            "movq _LBCarryMask, %%mm5     \n\t" //  alignment boundary
-// preload  "movl row, %%edi              \n\t" // edi:  Avg(x)
-            "movq _HBClearMask, %%mm4     \n\t"
-// preload  "movl prev_row, %%esi         \n\t" // esi:  Prior(x)
+            LOAD_GOT_rbp
+            "movq " AMASK5_3_0 ", %%mm7    \n\t" // _amask5_3_0 -> mm7
+// preload  "movl  diff, %%ecx             \n\t" // ecx:  x = offset to
+                                                 //  alignment boundary
+            "movq " LB_CARRY_MASK ", %%mm5 \n\t" // [interleave for parallel.?]
+// preload  "movl  row, %1                 \n\t" // edi:  Avg(x)
+            "movq " HB_CLEAR_MASK ", %%mm4 \n\t" // _HBClearMask -> mm4
+// preload  "movl  prev_row, %0            \n\t" // esi:  Prior(x)
+            RESTORE_rbp
 
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes
-                                                // (correct pos. in loop below)
+            "movq  -8(%1," PCX ",), %%mm2 \n\t"// load previous aligned 8 bytes
+                                               // (correct pos. in loop below)
          "avg_3lp:                        \n\t"
-            "movq (%%edi,%%ecx,), %%mm0   \n\t" // load mm0 with Avg(x)
-            "movq %%mm5, %%mm3            \n\t"
-            "psrlq _ShiftRem, %%mm2       \n\t" // correct position Raw(x-bpp)
+            "movq  (%1," PCX ",), %%mm0   \n\t" // load mm0 with Avg(x)
+            "movq  %%mm5, %%mm3           \n\t"
+            "psrlq $40, %%mm2             \n\t" // correct position Raw(x-bpp)
                                                 // data
-            "movq (%%esi,%%ecx,), %%mm1   \n\t" // load mm1 with Prior(x)
-            "movq %%mm7, %%mm6            \n\t"
-            "pand %%mm1, %%mm3            \n\t" // get lsb for each prev_row byte
+            "movq  (%0," PCX ",), %%mm1   \n\t" // load mm1 with Prior(x)
+            "movq  %%mm7, %%mm6           \n\t"
+            "pand  %%mm1, %%mm3           \n\t" // get lsb for each prevrow byte
             "psrlq $1, %%mm1              \n\t" // divide prev_row bytes by 2
             "pand  %%mm4, %%mm1           \n\t" // clear invalid bit 7 of each
                                                 // byte
             "paddb %%mm1, %%mm0           \n\t" // add (Prev_row/2) to Avg for
                                                 // each byte
             // add 1st active group (Raw(x-bpp)/2) to average with LBCarry
-            "movq %%mm3, %%mm1            \n\t" // now use mm1 for getting
+            "movq  %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                 // LBCarrys
-            "pand %%mm2, %%mm1            \n\t" // get LBCarrys for each byte
-                                                // where both
-                               // lsb's were == 1 (only valid for active group)
+            "pand  %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
+                                                // where both lsb's were == 1
+                                                // (valid only for active group)
             "psrlq $1, %%mm2              \n\t" // divide raw bytes by 2
             "pand  %%mm4, %%mm2           \n\t" // clear invalid bit 7 of each
                                                 // byte
-            "paddb %%mm1, %%mm2           \n\t" // add LBCarrys to (Raw(x-bpp)/2)
+            "paddb %%mm1, %%mm2           \n\t" // add LBCarrys to Raw(x-bpp)/2
                                                 // for each byte
-            "pand %%mm6, %%mm2            \n\t" // leave only Active Group 1
+            "pand  %%mm6, %%mm2           \n\t" // leave only Active Group 1
                                                 // bytes to add to Avg
             "paddb %%mm2, %%mm0           \n\t" // add (Raw/2) + LBCarrys to
-                                                // Avg for each Active
-                               //  byte
+                                                // Avg for each Active byte
             // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry
-            "psllq _ShiftBpp, %%mm6       \n\t" // shift the mm6 mask to cover
+            "psllq $24, %%mm6             \n\t" // shift the mm6 mask to cover
                                                 // bytes 3-5
-            "movq %%mm0, %%mm2            \n\t" // mov updated Raws to mm2
-            "psllq _ShiftBpp, %%mm2       \n\t" // shift data to pos. correctly
-            "movq %%mm3, %%mm1            \n\t" // now use mm1 for getting
+            "movq  %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $24, %%mm2             \n\t" // shift data to pos. correctly
+            "movq  %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                 // LBCarrys
-            "pand %%mm2, %%mm1            \n\t" // get LBCarrys for each byte
-                                                // where both
-                               // lsb's were == 1 (only valid for active group)
+            "pand  %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
+                                                // where both lsb's were == 1
+                                                // (valid only for active group)
             "psrlq $1, %%mm2              \n\t" // divide raw bytes by 2
             "pand  %%mm4, %%mm2           \n\t" // clear invalid bit 7 of each
                                                 // byte
-            "paddb %%mm1, %%mm2           \n\t" // add LBCarrys to (Raw(x-bpp)/2)
+            "paddb %%mm1, %%mm2           \n\t" // add LBCarrys to Raw(x-bpp)/2
                                                 // for each byte
-            "pand %%mm6, %%mm2            \n\t" // leave only Active Group 2
+            "pand  %%mm6, %%mm2           \n\t" // leave only Active Group 2
                                                 // bytes to add to Avg
             "paddb %%mm2, %%mm0           \n\t" // add (Raw/2) + LBCarrys to
-                                                // Avg for each Active
-                               //  byte
+                                                // Avg for each Active byte
 
             // add 3rd active group (Raw(x-bpp)/2) to average with _LBCarry
-            "psllq _ShiftBpp, %%mm6       \n\t" // shift mm6 mask to cover last
-                                                // two
-                                 // bytes
-            "movq %%mm0, %%mm2            \n\t" // mov updated Raws to mm2
-            "psllq _ShiftBpp, %%mm2       \n\t" // shift data to pos. correctly
-                              // Data only needs to be shifted once here to
+            "psllq $24, %%mm6             \n\t" // shift mm6 mask to cover last
+                                                // two bytes
+            "movq  %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $24, %%mm2             \n\t" // shift data to pos. correctly
+                              // Data need be shifted only once here to
                               // get the correct x-bpp offset.
-            "movq %%mm3, %%mm1            \n\t" // now use mm1 for getting
+            "movq  %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                 // LBCarrys
-            "pand %%mm2, %%mm1            \n\t" // get LBCarrys for each byte
+            "pand  %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
                                                 // where both
                               // lsb's were == 1 (only valid for active group)
             "psrlq $1, %%mm2              \n\t" // divide raw bytes by 2
             "pand  %%mm4, %%mm2           \n\t" // clear invalid bit 7 of each
                                                 // byte
-            "paddb %%mm1, %%mm2           \n\t" // add LBCarrys to (Raw(x-bpp)/2)
+            "paddb %%mm1, %%mm2           \n\t" // add LBCarrys to Raw(x-bpp)/2
                                                 // for each byte
-            "pand %%mm6, %%mm2            \n\t" // leave only Active Group 2
+            "pand  %%mm6, %%mm2           \n\t" // leave only Active Group 2
                                                 // bytes to add to Avg
-            "addl $8, %%ecx               \n\t"
+            "addl  $8, %%ecx              \n\t"
             "paddb %%mm2, %%mm0           \n\t" // add (Raw/2) + LBCarrys to
-                                                // Avg for each Active
-                                                // byte
+                                                // Avg for each Active byte
             // now ready to write back to memory
-            "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
+            "movq  %%mm0, -8(%1," PCX ",) \n\t"
             // move updated Raw(x) to use as Raw(x-bpp) for next loop
-            "cmpl _MMXLength, %%ecx       \n\t"
-            "movq %%mm0, %%mm2            \n\t" // mov updated Raw(x) to mm2
+            "cmpl  %%eax, %%ecx           \n\t" // MMXLength
+            "movq  %%mm0, %%mm2           \n\t" // mov updated Raw(x) to mm2
             "jb avg_3lp                   \n\t"
 
-            : "=S" (dummy_value_S),             // output regs (dummy)
-              "=D" (dummy_value_D)
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi           // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),    // esi/rsi    // input regs
+              "1" (row),         // edi/rdi
+              "2" (diff),        // ecx
+              "3" (MMXLength)    // eax
 
-            : "%ecx"                            // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
             , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
       break;  // end 3 bpp
 
-      case 6:
-      case 4:
-      //case 7:   // who wrote this?  PNG doesn't support 5 or 7 bytes/pixel
-      //case 5:   // GRR BOGUS
-      {
-         _ActiveMask.use  = 0xffffffffffffffffLL; // use shift below to clear
-                                                  // appropriate inactive bytes
-         _ShiftBpp.use = bpp << 3;
-         _ShiftRem.use = 64 - _ShiftBpp.use;
+      case 4:   // formerly shared with 6 bpp case via _ShiftBpp and _ShiftRem,
+      {         // but loop uses all 8 MMX regs, and psrlq/psllq require 64-bit
+                // mem (PIC/.so problems), MMX reg (none left), or immediate
+//       _ShiftBpp = bpp << 3;        // 32 (psllq)
+//       _ShiftRem = 64 - _ShiftBpp;  // 32 (psrlq)
 
          __asm__ __volatile__ (
-            "movq _HBClearMask, %%mm4    \n\t"
-
+            LOAD_GOT_rbp
+            "movq " HB_CLEAR_MASK ", %%mm4 \n\t" // _HBClearMask -> mm4
+            "movq " LB_CARRY_MASK ", %%mm5 \n\t" // _LBCarryMask -> mm5
             // re-init address pointers and offset
-            "movl _dif, %%ecx            \n\t" // ecx:  x = offset to
-                                               // alignment boundary
+// preload  "movl  diff, %%ecx             \n\t" // ecx:  x = offset to
+                                                 // alignment boundary
+            "movq " AMASK0_8_0 ", %%mm7    \n\t" // _amask0_8_0 -> mm7
+            RESTORE_rbp
 
-            // load _ActiveMask and clear all bytes except for 1st active group
-            "movq _ActiveMask, %%mm7     \n\t"
-// preload  "movl row, %%edi             \n\t" // edi:  Avg(x)
-            "psrlq _ShiftRem, %%mm7      \n\t"
-// preload  "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
-            "movq %%mm7, %%mm6           \n\t"
-            "movq _LBCarryMask, %%mm5    \n\t"
-            "psllq _ShiftBpp, %%mm6      \n\t" // create mask for 2nd active
-                                               // group
+            // ... and clear all bytes except for 1st active group
+// preload  "movl  row, %1               \n\t" // edi:  Avg(x)
+            "psrlq $32, %%mm7            \n\t" // was _ShiftRem
+// preload  "movl  prev_row, %0          \n\t" // esi:  Prior(x)
+            "movq  %%mm7, %%mm6          \n\t"
+            "psllq $32, %%mm6            \n\t" // mask for 2nd active group
 
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes
-                                          // (we correct pos. in loop below)
+            "movq -8(%1," PCX ",), %%mm2 \n\t" // load previous aligned 8 bytes
+                                             // (we correct pos. in loop below)
          "avg_4lp:                       \n\t"
-            "movq (%%edi,%%ecx,), %%mm0  \n\t"
-            "psrlq _ShiftRem, %%mm2      \n\t" // shift data to pos. correctly
-            "movq (%%esi,%%ecx,), %%mm1  \n\t"
+            "movq (%1," PCX ",), %%mm0   \n\t"
+            "psrlq $32, %%mm2            \n\t" // shift data to pos. correctly
+            "movq (%0," PCX ",), %%mm1   \n\t"
             // add (Prev_row/2) to average
             "movq %%mm5, %%mm3           \n\t"
             "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
@@ -3047,7 +3530,7 @@
                               // byte
             // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry
             "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
-            "psllq _ShiftBpp, %%mm2      \n\t" // shift data to pos. correctly
+            "psllq $32, %%mm2            \n\t" // shift data to pos. correctly
             "addl $8, %%ecx              \n\t"
             "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                // LBCarrys
@@ -3062,54 +3545,114 @@
             "pand %%mm6, %%mm2           \n\t" // leave only Active Group 2
                                                // bytes to add to Avg
             "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to
-                                               // Avg for each Active
-                              // byte
-            "cmpl _MMXLength, %%ecx      \n\t"
+                                               // Avg for each Active byte
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
             // now ready to write back to memory
-            "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
+            "movq %%mm0, -8(%1," PCX ",) \n\t"
             // prep Raw(x-bpp) for next loop
             "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
             "jb avg_4lp                  \n\t"
 
             : "=S" (dummy_value_S),            // output regs (dummy)
-              "=D" (dummy_value_D)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi          // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),    // esi/rsi    // input regs
+              "1" (row),         // edi/rdi
+              "2" (diff),        // ecx
+              "3" (MMXLength)    // eax
 
-            : "%ecx"                           // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
             , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
-      break;  // end 4,6 bpp
+      break;  // end 4 bpp
+
+      case 1:
+      {
+         __asm__ __volatile__ (
+            // re-init address pointers and offset
+// preload  "movl diff, %%ecx            \n\t" // ecx: x = offset to align. bdry
+// preload  "movl row, %1                \n\t" // edi/rdi:  Avg(x)
+// preload  "movl FullLength, %%eax      \n\t"
+            "cmpl %%eax, %%ecx           \n\t" // test if offset at end of array
+            "jnb avg_1end                \n\t"
+
+            SAVE_ebp
+
+            // do Avg decode for remaining bytes
+// preload  "movl prev_row, %0           \n\t" // esi/rsi:  Prior(x)
+            "mov  %1, " PBP "            \n\t" // copy of row pointer...
+            "dec  " PBP "                \n\t" // ebp/rbp:  Raw(x-bpp)
+            "xorl %%edx, %%edx           \n\t" // zero edx before using dl & dx
+                                               //  in loop below
+            SAVE_GOT_ebx
+
+         "avg_1lp:                       \n\t"
+            // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
+            "xorl %%ebx, %%ebx           \n\t"
+            "movb (%0," PCX ",), %%dl    \n\t" // load dl with Prior(x)
+            "movb (" PBP "," PCX ",), %%bl \n\t" // load bl with Raw(x-bpp)
+            "addw %%dx, %%bx             \n\t"
+            "incl %%ecx                  \n\t"
+            "shrw %%bx                   \n\t" // divide by 2
+            "addb -1(%1," PCX ",), %%bl  \n\t" // add Avg(x); -1 to offset
+                                               // inc ecx
+            "cmpl %%eax, %%ecx           \n\t" // check if at end of array
+            "movb %%bl, -1(%1," PCX ",)  \n\t" // write back Raw(x);
+                         // mov does not affect flags; -1 to offset inc ecx
+            "jb avg_1lp                  \n\t"
+
+            RESTORE_GOT_ebx
+            RESTORE_ebp
+
+         "avg_1end:                      \n\t"
+
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
+
+            : "0" (prev_row),    // esi/rsi    // input regs
+              "1" (row),         // edi/rdi
+              "2" (diff),        // ecx
+              "3" (FullLength)   // eax
+
+            : "%edx"                           // clobber list
+              _CLOBBER_GOT_ebx
+              _CLOBBER_ebp
+         );
+      }
+      return;  // end 1 bpp
 
       case 2:
       {
-         _ActiveMask.use  = 0x000000000000ffffLL;
-         _ShiftBpp.use = 16;   // == 2 * 8
-         _ShiftRem.use = 48;   // == 64 - 16
+//       _ShiftBpp = 16;   // == 2 * 8
+//       _ShiftRem = 48;   // == 64 - _ShiftBpp
 
          __asm__ __volatile__ (
-            // load _ActiveMask
-            "movq _ActiveMask, %%mm7     \n\t"
+            LOAD_GOT_rbp
+            // load (former) _ActiveMask
+            "movq " AMASK6_2_0 ", %%mm7    \n\t" // _amask6_2_0 -> mm7
             // re-init address pointers and offset
-            "movl _dif, %%ecx            \n\t" // ecx:  x = offset to alignment
-                                               // boundary
-            "movq _LBCarryMask, %%mm5    \n\t"
-// preload  "movl row, %%edi             \n\t" // edi:  Avg(x)
-            "movq _HBClearMask, %%mm4    \n\t"
-// preload  "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
+// preload  "movl  diff, %%ecx             \n\t" // ecx:  x = offset to
+                                                 // alignment boundary
+            "movq " LB_CARRY_MASK ", %%mm5 \n\t" // _LBCarryMask -> mm5
+// preload  "movl  row, %1                 \n\t" // edi:  Avg(x)
+            "movq " HB_CLEAR_MASK ", %%mm4 \n\t" // _HBClearMask -> mm4
+// preload  "movl  prev_row, %0            \n\t" // esi:  Prior(x)
+            RESTORE_rbp
 
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes
-                              // (we correct pos. in loop below)
+            "movq -8(%1," PCX ",), %%mm2 \n\t" // load previous aligned 8 bytes
+                                             // (we correct pos. in loop below)
          "avg_2lp:                       \n\t"
-            "movq (%%edi,%%ecx,), %%mm0  \n\t"
-            "psrlq _ShiftRem, %%mm2      \n\t" // shift data to pos. correctly
-            "movq (%%esi,%%ecx,), %%mm1  \n\t" //  (GRR BUGFIX:  was psllq)
+            "movq (%1," PCX ",), %%mm0   \n\t"
+            "psrlq $48, %%mm2            \n\t" // shift data to pos. correctly
+            "movq (%0," PCX ",), %%mm1   \n\t" //  (GRR BUGFIX:  was psllq)
             // add (Prev_row/2) to average
             "movq %%mm5, %%mm3           \n\t"
             "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
@@ -3138,10 +3681,10 @@
                                                // for each Active byte
 
             // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry
-            "psllq _ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover
+            "psllq $16, %%mm6            \n\t" // shift the mm6 mask to cover
                                                // bytes 2 & 3
             "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
-            "psllq _ShiftBpp, %%mm2      \n\t" // shift data to pos. correctly
+            "psllq $16, %%mm2            \n\t" // shift data to pos. correctly
             "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                // LBCarrys
             "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
@@ -3159,10 +3702,10 @@
                                                // Avg for each Active byte
 
             // add 3rd active group (Raw(x-bpp)/2) to average with _LBCarry
-            "psllq _ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover
+            "psllq $16, %%mm6            \n\t" // shift the mm6 mask to cover
                                                // bytes 4 & 5
             "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
-            "psllq _ShiftBpp, %%mm2      \n\t" // shift data to pos. correctly
+            "psllq $16, %%mm2            \n\t" // shift data to pos. correctly
             "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                // LBCarrys
             "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
@@ -3179,10 +3722,10 @@
                                                // Avg for each Active byte
 
             // add 4th active group (Raw(x-bpp)/2) to average with _LBCarry
-            "psllq _ShiftBpp, %%mm6      \n\t" // shift the mm6 mask to cover
+            "psllq $16, %%mm6            \n\t" // shift the mm6 mask to cover
                                                // bytes 6 & 7
             "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
-            "psllq _ShiftBpp, %%mm2      \n\t" // shift data to pos. correctly
+            "psllq $16, %%mm2            \n\t" // shift data to pos. correctly
             "addl $8, %%ecx              \n\t"
             "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting
                                                // LBCarrys
@@ -3199,102 +3742,148 @@
                                                // bytes to add to Avg
             "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to
                                                // Avg for each Active byte
-
-            "cmpl _MMXLength, %%ecx      \n\t"
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
             // now ready to write back to memory
-            "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
+            "movq %%mm0, -8(%1," PCX ",) \n\t"
             // prep Raw(x-bpp) for next loop
             "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
             "jb avg_2lp                  \n\t"
 
             : "=S" (dummy_value_S),            // output regs (dummy)
-              "=D" (dummy_value_D)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi          // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),    // esi/rsi    // input regs
+              "1" (row),         // edi/rdi
+              "2" (diff),        // ecx
+              "3" (MMXLength)    // eax
 
-            : "%ecx"                           // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
             , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
       break;  // end 2 bpp
 
-      case 1:
+      case 6:   // formerly shared with 4 bpp case (see comments there)
       {
+//       _ShiftBpp = bpp << 3;        // 48 (psllq)
+//       _ShiftRem = 64 - _ShiftBpp;  // 16 (psrlq)
+
          __asm__ __volatile__ (
+            LOAD_GOT_rbp
+            "movq " HB_CLEAR_MASK ", %%mm4 \n\t" // _HBClearMask -> mm4
+            "movq " LB_CARRY_MASK ", %%mm5 \n\t" // _LBCarryMask -> mm5
             // re-init address pointers and offset
-#ifdef __PIC__
-            "pushl %%ebx                 \n\t" // save Global Offset Table index
-#endif
-            "movl _dif, %%ebx            \n\t" // ebx:  x = offset to alignment
-                                               // boundary
-// preload  "movl row, %%edi             \n\t" // edi:  Avg(x)
-            "cmpl _FullLength, %%ebx     \n\t" // test if offset at end of array
-            "jnb avg_1end                \n\t"
-            // do Paeth decode for remaining bytes
-// preload  "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
-            "movl %%edi, %%edx           \n\t"
-// preload  "subl bpp, %%edx             \n\t" // (bpp is preloaded into ecx)
-            "subl %%ecx, %%edx           \n\t" // edx:  Raw(x-bpp)
-            "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx
-                                               //  in loop below
-         "avg_1lp:                       \n\t"
-            // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
-            "xorl %%eax, %%eax           \n\t"
-            "movb (%%esi,%%ebx,), %%cl   \n\t" // load cl with Prior(x)
-            "movb (%%edx,%%ebx,), %%al   \n\t" // load al with Raw(x-bpp)
-            "addw %%cx, %%ax             \n\t"
-            "incl %%ebx                  \n\t"
-            "shrw %%ax                   \n\t" // divide by 2
-            "addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset
-                                               // inc ebx
-            "cmpl _FullLength, %%ebx     \n\t" // check if at end of array
-            "movb %%al, -1(%%edi,%%ebx,) \n\t" // write back Raw(x);
-                         // mov does not affect flags; -1 to offset inc ebx
-            "jb avg_1lp                  \n\t"
+// preload  "movl  diff, %%ecx             \n\t" // ecx:  x = offset to
+                                                 // alignment boundary
+            "movq " AMASK0_8_0 ", %%mm7    \n\t" // _amask0_8_0 -> mm7
+            RESTORE_rbp
 
-         "avg_1end:                      \n\t"
-#ifdef __PIC__
-            "popl %%ebx                  \n\t" // Global Offset Table index
-#endif
+            // ... and clear all bytes except for 1st active group
+// preload  "movl  row, %1               \n\t" // edi:  Avg(x)
+            "psrlq $16, %%mm7            \n\t"
+// preload  "movl  prev_row, %0          \n\t" // esi:  Prior(x)
+            "movq  %%mm7, %%mm6          \n\t"
+            "psllq $48, %%mm6            \n\t" // mask for 2nd active group
 
-            : "=c" (dummy_value_c),            // output regs (dummy)
-              "=S" (dummy_value_S),
-              "=D" (dummy_value_D)
+            // prime the pump:  load the first Raw(x-bpp) data set
+            "movq -8(%1," PCX ",), %%mm2 \n\t" // load previous aligned 8 bytes
+                                             // (we correct pos. in loop below)
+         "avg_6lp:                       \n\t"
+            "movq (%1," PCX ",), %%mm0   \n\t"
+            "psrlq $16, %%mm2            \n\t" // shift data to pos. correctly
+            "movq (%0," PCX ",), %%mm1   \n\t"
+            // add (Prev_row/2) to average
+            "movq %%mm5, %%mm3           \n\t"
+            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
+            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
+            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each
+                                               // byte
+            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for
+                                               // each byte
+            // add 1st active group (Raw(x-bpp)/2) to average with _LBCarry
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting
+                                               // LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
+                                               // where both
+                              // lsb's were == 1 (only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each
+                                               // byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2)
+                                               // for each byte
+            "pand %%mm7, %%mm2           \n\t" // leave only Active Group 1
+                                               // bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to Avg
+                                               // for each Active
+                              // byte
+            // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "psllq $48, %%mm2            \n\t" // shift data to pos. correctly
+            "addl $8, %%ecx              \n\t"
+            "movq %%mm3, %%mm1           \n\t" // now use mm1 for getting
+                                               // LBCarrys
+            "pand %%mm2, %%mm1           \n\t" // get LBCarrys for each byte
+                                               // where both
+                              // lsb's were == 1 (only valid for active group)
+            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
+            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each
+                                               // byte
+            "paddb %%mm1, %%mm2          \n\t" // add LBCarrys to (Raw(x-bpp)/2)
+                                               // for each byte
+            "pand %%mm6, %%mm2           \n\t" // leave only Active Group 2
+                                               // bytes to add to Avg
+            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) + LBCarrys to
+                                               // Avg for each Active byte
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
+            // now ready to write back to memory
+            "movq %%mm0, -8(%1," PCX ",) \n\t"
+            // prep Raw(x-bpp) for next loop
+            "movq %%mm0, %%mm2           \n\t" // mov updated Raws to mm2
+            "jb avg_6lp                  \n\t"
 
-            : "0" (bpp),       // ecx          // input regs
-              "1" (prev_row),  // esi
-              "2" (row)        // edi
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "%eax", "%edx"                   // clobber list
-#ifndef __PIC__
-            , "%ebx"
+            : "0" (prev_row),    // esi/rsi    // input regs
+              "1" (row),         // edi/rdi
+              "2" (diff),        // ecx
+              "3" (MMXLength)    // eax
+
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
+            , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
-      return;  // end 1 bpp
+      break;  // end 6 bpp
 
       case 8:
       {
          __asm__ __volatile__ (
             // re-init address pointers and offset
-            "movl _dif, %%ecx            \n\t" // ecx:  x == offset to alignment
-            "movq _LBCarryMask, %%mm5    \n\t" //            boundary
-// preload  "movl row, %%edi             \n\t" // edi:  Avg(x)
-            "movq _HBClearMask, %%mm4    \n\t"
-// preload  "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
+// preload  "movl  diff, %%ecx             \n\t" // ecx:  x = offset to
+                                                 // alignment boundary
+            LOAD_GOT_rbp
+            "movq " LB_CARRY_MASK ", %%mm5 \n\t" // [interleave for parallel.?]
+// preload  "movl  row, %1                 \n\t" // edi:  Avg(x)
+            "movq " HB_CLEAR_MASK ", %%mm4 \n\t" // _HBClearMask -> mm4
+// preload  "movl  prev_row, %0            \n\t" // esi:  Prior(x)
+            RESTORE_rbp
 
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes
+            "movq -8(%1," PCX ",), %%mm2 \n\t" // load previous aligned 8 bytes
                                       // (NO NEED to correct pos. in loop below)
 
          "avg_8lp:                       \n\t"
-            "movq (%%edi,%%ecx,), %%mm0  \n\t"
+            "movq (%1," PCX ",), %%mm0   \n\t"
             "movq %%mm5, %%mm3           \n\t"
-            "movq (%%esi,%%ecx,), %%mm1  \n\t"
+            "movq (%0," PCX ",), %%mm1   \n\t"
             "addl $8, %%ecx              \n\t"
             "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
             "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
@@ -3306,78 +3895,36 @@
             "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7, each byte
             "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg, each
             "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) to Avg for each
-            "cmpl _MMXLength, %%ecx      \n\t"
-            "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
+            "movq %%mm0, -8(%1," PCX ",) \n\t"
             "movq %%mm0, %%mm2           \n\t" // reuse as Raw(x-bpp)
             "jb avg_8lp                  \n\t"
 
             : "=S" (dummy_value_S),            // output regs (dummy)
-              "=D" (dummy_value_D)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi          // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),    // esi/rsi    // input regs
+              "1" (row),         // edi/rdi
+              "2" (diff),        // ecx
+              "3" (MMXLength)    // eax
 
-            : "%ecx"                           // clobber list
-#if 0  /* %mm0, ..., %mm5 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2"           // clobber list
             , "%mm3", "%mm4", "%mm5"
 #endif
          );
       }
       break;  // end 8 bpp
 
-      default:                  // bpp greater than 8 (!= 1,2,3,4,[5],6,[7],8)
+      default:                // bpp != 1,2,3,4,6,8:  doesn't exist
       {
-
-#ifdef PNG_DEBUG
-         // GRR:  PRINT ERROR HERE:  SHOULD NEVER BE REACHED
-        png_debug(1,
-        "Internal logic error in pnggccrd (png_read_filter_row_mmx_avg())\n");
+         // ERROR:  SHOULD NEVER BE REACHED
+#if defined(PNG_DEBUG)
+         png_debug(1, "Internal libpng logic error (GCC "
+           "png_read_filter_row_mmx_avg())\n");
 #endif
-
-#if 0
-        __asm__ __volatile__ (
-            "movq _LBCarryMask, %%mm5    \n\t"
-            // re-init address pointers and offset
-            "movl _dif, %%ebx            \n\t" // ebx:  x = offset to
-                                               // alignment boundary
-            "movl row, %%edi             \n\t" // edi:  Avg(x)
-            "movq _HBClearMask, %%mm4    \n\t"
-            "movl %%edi, %%edx           \n\t"
-            "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
-            "subl bpp, %%edx             \n\t" // edx:  Raw(x-bpp)
-         "avg_Alp:                       \n\t"
-            "movq (%%edi,%%ebx,), %%mm0  \n\t"
-            "movq %%mm5, %%mm3           \n\t"
-            "movq (%%esi,%%ebx,), %%mm1  \n\t"
-            "pand %%mm1, %%mm3           \n\t" // get lsb for each prev_row byte
-            "movq (%%edx,%%ebx,), %%mm2  \n\t"
-            "psrlq $1, %%mm1             \n\t" // divide prev_row bytes by 2
-            "pand %%mm2, %%mm3           \n\t" // get LBCarrys for each byte
-                                               // where both lsb's were == 1
-            "psrlq $1, %%mm2             \n\t" // divide raw bytes by 2
-            "pand  %%mm4, %%mm1          \n\t" // clear invalid bit 7 of each
-                                               // byte
-            "paddb %%mm3, %%mm0          \n\t" // add LBCarrys to Avg for each
-                                               // byte
-            "pand  %%mm4, %%mm2          \n\t" // clear invalid bit 7 of each
-                                               // byte
-            "paddb %%mm1, %%mm0          \n\t" // add (Prev_row/2) to Avg for
-                                               // each byte
-            "addl $8, %%ebx              \n\t"
-            "paddb %%mm2, %%mm0          \n\t" // add (Raw/2) to Avg for each
-                                               // byte
-            "cmpl _MMXLength, %%ebx      \n\t"
-            "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
-            "jb avg_Alp                  \n\t"
-
-            : // FIXASM: output regs/vars go here, e.g.:  "=m" (memory_var)
-
-            : // FIXASM: input regs, e.g.:  "c" (count), "S" (src), "D" (dest)
-
-            : "%ebx", "%edx", "%edi", "%esi" // CHECKASM: clobber list
-         );
-#endif /* 0 - NEVER REACHED */
       }
       break;
 
@@ -3386,60 +3933,69 @@
    __asm__ __volatile__ (
       // MMX acceleration complete; now do clean-up
       // check if any remaining bytes left to decode
-#ifdef __PIC__
-      "pushl %%ebx                 \n\t" // save index to Global Offset Table
-#endif
-      "movl _MMXLength, %%ebx      \n\t" // ebx:  x == offset bytes after MMX
-//pre "movl row, %%edi             \n\t" // edi:  Avg(x)
-      "cmpl _FullLength, %%ebx     \n\t" // test if offset at end of array
+//pre "movl FullLength, %%edx      \n\t"
+//pre "movl MMXLength, %%eax       \n\t" // eax:  x == offset bytes after MMX
+//pre "movl row, %2                \n\t" // edi:  Avg(x)
+      "cmpl %%edx, %%eax           \n\t" // test if offset at end of array
       "jnb avg_end                 \n\t"
 
+      SAVE_ebp
+
       // do Avg decode for remaining bytes
-//pre "movl prev_row, %%esi        \n\t" // esi:  Prior(x)
-      "movl %%edi, %%edx           \n\t"
-//pre "subl bpp, %%edx             \n\t" // (bpp is preloaded into ecx)
-      "subl %%ecx, %%edx           \n\t" // edx:  Raw(x-bpp)
+//pre "movl prev_row, %1           \n\t" // esi:  Prior(x)
+      "mov  %2, " PBP "            \n\t" // copy of row pointer...
+//pre "subl bpp, " PBP "           \n\t" // (bpp is preloaded into ecx)
+      "sub  " PCX "," PBP "        \n\t" // ebp:  Raw(x-bpp)
       "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx below
 
+      SAVE_GOT_ebx
+
    "avg_lp2:                       \n\t"
       // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2)
-      "xorl %%eax, %%eax           \n\t"
-      "movb (%%esi,%%ebx,), %%cl   \n\t" // load cl with Prior(x)
-      "movb (%%edx,%%ebx,), %%al   \n\t" // load al with Raw(x-bpp)
-      "addw %%cx, %%ax             \n\t"
-      "incl %%ebx                  \n\t"
-      "shrw %%ax                   \n\t" // divide by 2
-      "addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset inc ebx
-      "cmpl _FullLength, %%ebx     \n\t" // check if at end of array
-      "movb %%al, -1(%%edi,%%ebx,) \n\t" // write back Raw(x) [mov does not
-      "jb avg_lp2                  \n\t" //  affect flags; -1 to offset inc ebx]
+      "xorl %%ebx, %%ebx           \n\t"
+      "movb (%1," PAX ",), %%cl    \n\t" // load cl with Prior(x)
+      "movb (" PBP "," PAX ",), %%bl \n\t" // load bl with Raw(x-bpp)
+      "addw %%cx, %%bx             \n\t"
+      "incl %%eax                  \n\t"
+      "shrw %%bx                   \n\t" // divide by 2
+      "addb -1(%2," PAX ",), %%bl  \n\t" // add Avg(x); -1 to offset inc eax
+      "cmpl %%edx, %%eax           \n\t" // check if at end of array
+      "movb %%bl, -1(%2," PAX ",)  \n\t" // write back Raw(x) [mov does not
+      "jb avg_lp2                  \n\t" //  affect flags; -1 to offset inc eax]
+
+      RESTORE_GOT_ebx
+      RESTORE_ebp
 
    "avg_end:                       \n\t"
       "EMMS                        \n\t" // end MMX; prep for poss. FP instrs.
-#ifdef __PIC__
-      "popl %%ebx                  \n\t" // restore index to Global Offset Table
-#endif
 
       : "=c" (dummy_value_c),            // output regs (dummy)
         "=S" (dummy_value_S),
-        "=D" (dummy_value_D)
+        "=D" (dummy_value_D),
+        "=a" (dummy_value_a),
+        "=d" (dummy_value_d)
 
-      : "0" (bpp),       // ecx          // input regs
-        "1" (prev_row),  // esi
-        "2" (row)        // edi
+      : "0" (bpp),         // ecx        // input regs
+        "1" (prev_row),    // esi/rsi
+        "2" (row),         // edi/rdi
+        "3" (MMXLength),   // eax
+        "4" (FullLength)   // edx
 
-      : "%eax", "%edx"                   // clobber list
-#ifndef __PIC__
-      , "%ebx"
-#endif
+      CLOB_COLON_ebx_ebp                 // clobber list
+        CLOBBER_GOT_ebx
+        CLOB_COMMA_ebx_ebp
+        CLOBBER_ebp
    );
 
 } /* end png_read_filter_row_mmx_avg() */
-#endif
+
+#endif /* PNG_MMX_READ_FILTER_AVG_SUPPORTED */
 
 
 
-#ifdef PNG_THREAD_UNSAFE_OK
+#if defined(PNG_MMX_READ_FILTER_PAETH_SUPPORTED)
+#if defined(PNG_x86_64_USE_GOTPCREL) || defined(PNG_THREAD_UNSAFE_OK)
+
 //===========================================================================//
 //                                                                           //
 //         P N G _ R E A D _ F I L T E R _ R O W _ M M X _ P A E T H         //
@@ -3452,141 +4008,157 @@
 png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row,
                               png_bytep prev_row)
 {
+   unsigned FullLength, MMXLength;  // png_uint_32 is actually 64-bit on x86-64
    int bpp;
+   int dummy_value_a;
    int dummy_value_c;   // fix 'forbidden register 2 (cx) was spilled' error
-   int dummy_value_S;
-   int dummy_value_D;
+   int dummy_value_d;
+   png_charp dummy_value_S;
+   png_charp dummy_value_D;
+   int diff; //     __attribute__((used));
 
-   bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel
-   _FullLength  = row_info->rowbytes; // # of bytes to filter
+   bpp = (row_info->pixel_depth + 7) >> 3;  // calc number of bytes per pixel
+   FullLength = row_info->rowbytes;         // number of bytes to filter
 
    __asm__ __volatile__ (
-#ifdef __PIC__
-      "pushl %%ebx                 \n\t" // save index to Global Offset Table
-#endif
+      SAVE_GOT_ebx
+      SAVE_r15
+      SAVE_ebp
+//pre "movl row, %2                \n\t" // edi/rdi
       "xorl %%ebx, %%ebx           \n\t" // ebx:  x offset
-//pre "movl row, %%edi             \n\t"
+//pre "movl prev_row, %1           \n\t" // esi/rsi
       "xorl %%edx, %%edx           \n\t" // edx:  x-bpp offset
-//pre "movl prev_row, %%esi        \n\t"
+//pre "movl FullLength, %%eax      \n\t" // bring in via eax...
+      SAVE_FullLength                    // ...but store for later use
       "xorl %%eax, %%eax           \n\t"
 
       // Compute the Raw value for the first bpp bytes
       // Note: the formula works out to be always
       //   Paeth(x) = Raw(x) + Prior(x)      where x < bpp
    "paeth_rlp:                     \n\t"
-      "movb (%%edi,%%ebx,), %%al   \n\t"
-      "addb (%%esi,%%ebx,), %%al   \n\t"
+      "movb (%2," PBX ",), %%al    \n\t"
+      "addb (%1," PBX ",), %%al    \n\t"
       "incl %%ebx                  \n\t"
 //pre "cmpl bpp, %%ebx             \n\t" (bpp is preloaded into ecx)
       "cmpl %%ecx, %%ebx           \n\t"
-      "movb %%al, -1(%%edi,%%ebx,) \n\t"
+      "movb %%al, -1(%2," PBX ",)  \n\t"
       "jb paeth_rlp                \n\t"
-      // get # of bytes to alignment
-      "movl %%edi, _dif            \n\t" // take start of row
-      "addl %%ebx, _dif            \n\t" // add bpp
-      "xorl %%ecx, %%ecx           \n\t"
-      "addl $0xf, _dif             \n\t" // add 7 + 8 to incr past alignment
-                                         // boundary
-      "andl $0xfffffff8, _dif      \n\t" // mask to alignment boundary
-      "subl %%edi, _dif            \n\t" // subtract from start ==> value ebx
-                                         // at alignment
-      "jz paeth_go                 \n\t"
-      // fix alignment
 
+      // get # of bytes to alignment (note:  computing _delta_ of two pointers,
+      // so hereafter %%ebp is sufficient even on 64-bit)
+      "mov  %2, " PBP "            \n\t" // take start of row
+      "add  " PBX "," PBP "        \n\t" // add bpp
+      "add  $0xf, " PBP "          \n\t" // add 7+8 to incr past alignment bdry
+//    "andl $0xfffffff8, %%ebp     \n\t" // mask to alignment boundary (32-bit!)
+      CLEAR_BOTTOM_3_BITS  PBP    "\n\t" // mask to alignment boundary
+      "sub  %2, " PBP "            \n\t" // subtract row ptr again => ebp =
+      "jz paeth_go                 \n\t" //  target value of ebx at alignment
+
+      "xorl %%ecx, %%ecx           \n\t"
+
+      SAVE_r11_r12_r13
+
+      // fix alignment
    "paeth_lp1:                     \n\t"
       "xorl %%eax, %%eax           \n\t"
       // pav = p - a = (a + b - c) - a = b - c
-      "movb (%%esi,%%ebx,), %%al   \n\t" // load Prior(x) into al
-      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "movb (%1," PBX ",), %%al    \n\t" // load Prior(x) into al
+      "movb (%1," PDX ",), %%cl    \n\t" // load Prior(x-bpp) into cl
       "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
-      "movl %%eax, _patemp         \n\t" // Save pav for later use
+      "movl %%eax, " pa_TEMP "     \n\t" // Save pav for later use
       "xorl %%eax, %%eax           \n\t"
       // pbv = p - b = (a + b - c) - b = a - c
-      "movb (%%edi,%%edx,), %%al   \n\t" // load Raw(x-bpp) into al
+      "movb (%2," PDX ",), %%al    \n\t" // load Raw(x-bpp) into al
       "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
       "movl %%eax, %%ecx           \n\t"
-      // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
-      "addl _patemp, %%eax         \n\t" // pcv = pav + pbv
+      // pcv = p - c = (a + b - c) - c = (a - c) + (b - c) = pav + pbv
+      "addl " pa_TEMP ", %%eax     \n\t" // pcv = pav + pbv
       // pc = abs(pcv)
       "testl $0x80000000, %%eax    \n\t"
       "jz paeth_pca                \n\t"
       "negl %%eax                  \n\t" // reverse sign of neg values
 
    "paeth_pca:                     \n\t"
-      "movl %%eax, _pctemp         \n\t" // save pc for later use
+      "movl %%eax, " pc_TEMP "     \n\t" // save pc for later use
       // pb = abs(pbv)
       "testl $0x80000000, %%ecx    \n\t"
       "jz paeth_pba                \n\t"
       "negl %%ecx                  \n\t" // reverse sign of neg values
 
    "paeth_pba:                     \n\t"
-      "movl %%ecx, _pbtemp         \n\t" // save pb for later use
+      "movl %%ecx, " pb_TEMP "     \n\t" // save pb for later use
       // pa = abs(pav)
-      "movl _patemp, %%eax         \n\t"
+      "movl " pa_TEMP ", %%eax     \n\t"
       "testl $0x80000000, %%eax    \n\t"
       "jz paeth_paa                \n\t"
       "negl %%eax                  \n\t" // reverse sign of neg values
 
    "paeth_paa:                     \n\t"
-      "movl %%eax, _patemp         \n\t" // save pa for later use
+      "movl %%eax, " pa_TEMP "     \n\t" // save pa for later use
       // test if pa <= pb
       "cmpl %%ecx, %%eax           \n\t"
       "jna paeth_abb               \n\t"
       // pa > pb; now test if pb <= pc
-      "cmpl _pctemp, %%ecx         \n\t"
+      "cmpl " pc_TEMP ", %%ecx     \n\t"
       "jna paeth_bbc               \n\t"
       // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
-      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "movb (%1," PDX ",), %%cl    \n\t" // load Prior(x-bpp) into cl
       "jmp paeth_paeth             \n\t"
 
    "paeth_bbc:                     \n\t"
       // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
-      "movb (%%esi,%%ebx,), %%cl   \n\t" // load Prior(x) into cl
+      "movb (%1," PBX ",), %%cl    \n\t" // load Prior(x) into cl
       "jmp paeth_paeth             \n\t"
 
    "paeth_abb:                     \n\t"
       // pa <= pb; now test if pa <= pc
-      "cmpl _pctemp, %%eax         \n\t"
+      "cmpl " pc_TEMP ", %%eax     \n\t"
       "jna paeth_abc               \n\t"
       // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
-      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "movb (%1," PDX ",), %%cl    \n\t" // load Prior(x-bpp) into cl
       "jmp paeth_paeth             \n\t"
 
    "paeth_abc:                     \n\t"
       // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
-      "movb (%%edi,%%edx,), %%cl   \n\t" // load Raw(x-bpp) into cl
+      "movb (%2," PDX ",), %%cl    \n\t" // load Raw(x-bpp) into cl
 
    "paeth_paeth:                   \n\t"
       "incl %%ebx                  \n\t"
       "incl %%edx                  \n\t"
       // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
-      "addb %%cl, -1(%%edi,%%ebx,) \n\t"
-      "cmpl _dif, %%ebx            \n\t"
+      "addb %%cl, -1(%2," PBX ",)  \n\t"
+      "cmpl %%ebp, %%ebx           \n\t"
       "jb paeth_lp1                \n\t"
 
+      RESTORE_r11_r12_r13
+
    "paeth_go:                      \n\t"
-      "movl _FullLength, %%ecx     \n\t"
+      RESTORE_FullLength "%%ecx    \n\t" // FullLength -> ecx
       "movl %%ecx, %%eax           \n\t"
       "subl %%ebx, %%eax           \n\t" // subtract alignment fix
       "andl $0x00000007, %%eax     \n\t" // calc bytes over mult of 8
       "subl %%eax, %%ecx           \n\t" // drop over bytes from original length
-      "movl %%ecx, _MMXLength      \n\t"
-#ifdef __PIC__
-      "popl %%ebx                  \n\t" // restore index to Global Offset Table
-#endif
+//out "movl %%ecx, MMXLength       \n\t"
+      "movl %%ebp, %%eax           \n\t" // ebp = diff, but no reg constraint(?)
+      RESTORE_ebp                        //  (could swap ebp and edx functions)
+      RESTORE_r15
+      RESTORE_GOT_ebx
 
-      : "=c" (dummy_value_c),            // output regs (dummy)
+      : "=c" (MMXLength),                // output regs
         "=S" (dummy_value_S),
-        "=D" (dummy_value_D)
+        "=D" (dummy_value_D),
+        "=a" (diff)
 
-      : "0" (bpp),       // ecx          // input regs
-        "1" (prev_row),  // esi
-        "2" (row)        // edi
+      : "0" (bpp),         // ecx        // input regs
+        "1" (prev_row),    // esi/rsi
+        "2" (row),         // edi/rdi
+        "3" (FullLength)   // eax
 
-      : "%eax", "%edx"                   // clobber list
-#ifndef __PIC__
-      , "%ebx"
-#endif
+      : "%edx"                           // clobber list
+        _CLOBBER_r11_r12_r13
+        _CLOBBER_r15
+        _CLOBBER_ebp
+        _CLOBBER_GOT_ebx
    );
 
    // now do the math for the rest of the row
@@ -3594,26 +4166,26 @@
    {
       case 3:
       {
-         _ActiveMask.use = 0x0000000000ffffffLL;
-         _ActiveMaskEnd.use = 0xffff000000000000LL;
-         _ShiftBpp.use = 24;    // == bpp(3) * 8
-         _ShiftRem.use = 40;    // == 64 - 24
+//       _ShiftBpp = 24;    // == bpp * 8
+//       _ShiftRem = 40;    // == 64 - _ShiftBpp
 
          __asm__ __volatile__ (
-            "movl _dif, %%ecx            \n\t"
-// preload  "movl row, %%edi             \n\t"
-// preload  "movl prev_row, %%esi        \n\t"
+            LOAD_GOT_rbp
+// preload  "movl diff, %%ecx            \n\t"
+// preload  "movl row, %1                \n\t" // edi/rdi
+// preload  "movl prev_row, %0           \n\t" // esi/rsi
             "pxor %%mm0, %%mm0           \n\t"
+
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
+            "movq -8(%1," PCX ",), %%mm1 \n\t"
          "paeth_3lp:                     \n\t"
-            "psrlq _ShiftRem, %%mm1      \n\t" // shift last 3 bytes to 1st
+            "psrlq $40, %%mm1            \n\t" // shift last 3 bytes to 1st
                                                // 3 bytes
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x)
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x)
             "punpcklbw %%mm0, %%mm1      \n\t" // unpack High bytes of a
-            "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // prep c=Prior(x-bpp) bytes
+            "movq -8(%0," PCX ",), %%mm3 \n\t" // prep c=Prior(x-bpp) bytes
             "punpcklbw %%mm0, %%mm2      \n\t" // unpack High bytes of b
-            "psrlq _ShiftRem, %%mm3      \n\t" // shift last 3 bytes to 1st
+            "psrlq $40, %%mm3            \n\t" // shift last 3 bytes to 1st
                                                // 3 bytes
             // pav = p - a = (a + b - c) - a = b - c
             "movq %%mm2, %%mm4           \n\t"
@@ -3663,16 +4235,16 @@
             "paddw %%mm3, %%mm7          \n\t"
             "pxor %%mm0, %%mm0           \n\t"
             "packuswb %%mm1, %%mm7       \n\t"
-            "movq (%%esi,%%ecx,), %%mm3  \n\t" // load c=Prior(x-bpp)
-            "pand _ActiveMask, %%mm7     \n\t"
+            "movq (%0," PCX ",), %%mm3   \n\t" // load c=Prior(x-bpp)
+            "pand " AMASK5_3_0 ", %%mm7  \n\t" // _amask5_3_0 (was _ActiveMask)
             "movq %%mm3, %%mm2           \n\t" // load b=Prior(x) step 1
-            "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "paddb (%1," PCX ",), %%mm7  \n\t" // add Paeth predictor + Raw(x)
             "punpcklbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
-            "movq %%mm7, (%%edi,%%ecx,)  \n\t" // write back updated value
+            "movq %%mm7, (%1," PCX ",)   \n\t" // write back updated value
             "movq %%mm7, %%mm1           \n\t" // now mm1 will be used as
                                                // Raw(x-bpp)
             // now do Paeth for 2nd set of bytes (3-5)
-            "psrlq _ShiftBpp, %%mm2      \n\t" // load b=Prior(x) step 2
+            "psrlq $24, %%mm2            \n\t" // load b=Prior(x) step 2
             "punpcklbw %%mm0, %%mm1      \n\t" // unpack High bytes of a
             "pxor %%mm7, %%mm7           \n\t"
             "punpcklbw %%mm0, %%mm2      \n\t" // unpack High bytes of b
@@ -3717,7 +4289,7 @@
             "paddw %%mm2, %%mm0          \n\t"
             //  test  ((pa <= pb)? pa:pb) <= pc
             "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x)
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x)
             "pand %%mm7, %%mm3           \n\t"
             "pandn %%mm0, %%mm7          \n\t"
             "pxor %%mm1, %%mm1           \n\t"
@@ -3725,18 +4297,18 @@
             "pxor %%mm0, %%mm0           \n\t"
             "packuswb %%mm1, %%mm7       \n\t"
             "movq %%mm2, %%mm3           \n\t" // load c=Prior(x-bpp) step 1
-            "pand _ActiveMask, %%mm7     \n\t"
+            "pand " AMASK5_3_0 ", %%mm7  \n\t" // _amask5_3_0 (was _ActiveMask)
             "punpckhbw %%mm0, %%mm2      \n\t" // unpack High bytes of b
-            "psllq _ShiftBpp, %%mm7      \n\t" // shift bytes to 2nd group of
+            "psllq $24, %%mm7            \n\t" // shift bytes to 2nd group of
                                                // 3 bytes
              // pav = p - a = (a + b - c) - a = b - c
             "movq %%mm2, %%mm4           \n\t"
-            "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
-            "psllq _ShiftBpp, %%mm3      \n\t" // load c=Prior(x-bpp) step 2
-            "movq %%mm7, (%%edi,%%ecx,)  \n\t" // write back updated value
+            "paddb (%1," PCX ",), %%mm7  \n\t" // add Paeth predictor + Raw(x)
+            "psllq $24, %%mm3            \n\t" // load c=Prior(x-bpp) step 2
+            "movq %%mm7, (%1," PCX ",)   \n\t" // write back updated value
             "movq %%mm7, %%mm1           \n\t"
             "punpckhbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
-            "psllq _ShiftBpp, %%mm1      \n\t" // shift bytes
+            "psllq $24, %%mm1            \n\t" // shift bytes (was _ShiftBpp)
                                     // now mm1 will be used as Raw(x-bpp)
             // now do Paeth for 3rd, and final, set of bytes (6-7)
             "pxor %%mm7, %%mm7           \n\t"
@@ -3787,59 +4359,328 @@
             "packuswb %%mm7, %%mm1       \n\t"
             // step ecx to next set of 8 bytes and repeat loop til done
             "addl $8, %%ecx              \n\t"
-            "pand _ActiveMaskEnd, %%mm1  \n\t"
-            "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add Paeth predictor with
-                                                 // Raw(x)
-
-            "cmpl _MMXLength, %%ecx      \n\t"
+            "pand " AMASK0_2_6 ", %%mm1  \n\t" // _amask0_2_6 (_ActiveMaskEnd)
+            "paddb -8(%1," PCX ",), %%mm1 \n\t" // add Paeth predictor + Raw(x)
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
             "pxor %%mm0, %%mm0           \n\t" // pxor does not affect flags
-            "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value
+            "movq %%mm1, -8(%1," PCX ",) \n\t" // write back updated value
                                  // mm1 will be used as Raw(x-bpp) next loop
                            // mm3 ready to be used as Prior(x-bpp) next loop
             "jb paeth_3lp                \n\t"
+            RESTORE_rbp
 
-            : "=S" (dummy_value_S),             // output regs (dummy)
-              "=D" (dummy_value_D)
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi           // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),  // esi/rsi      // input regs
+              "1" (row),       // edi/rdi
+              "2" (diff),      // ecx
+              "3" (MMXLength)  // eax
 
-            : "%ecx"                            // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
             , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
       break;  // end 3 bpp
 
-      case 6:
-      //case 7:   // GRR BOGUS
-      //case 5:   // GRR BOGUS
+      case 4:
       {
-         _ActiveMask.use  = 0x00000000ffffffffLL;
-         _ActiveMask2.use = 0xffffffff00000000LL;
-         _ShiftBpp.use = bpp << 3;    // == bpp * 8
-         _ShiftRem.use = 64 - _ShiftBpp.use;
+         __asm__ __volatile__ (
+// preload  "movl diff, %%ecx            \n\t"
+// preload  "movl row, %1                \n\t" // edi/rdi
+// preload  "movl prev_row, %0           \n\t" // esi/rsi
+            "pxor %%mm0, %%mm0           \n\t"
+            // prime the pump:  load the first Raw(x-bpp) data set
+            "movq -8(%1," PCX ",), %%mm1 \n\t" // only time should need to read
+                                               //  a=Raw(x-bpp) bytes
+         "paeth_4lp:                     \n\t"
+            // do first set of 4 bytes
+            "movq -8(%0," PCX ",), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "punpckhbw %%mm0, %%mm1      \n\t" // unpack Low bytes of a
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x)
+            "punpcklbw %%mm0, %%mm2      \n\t" // unpack High bytes of b
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            "punpckhbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "packuswb %%mm1, %%mm7       \n\t"
+            "movq (%0," PCX ",), %%mm3   \n\t" // load c=Prior(x-bpp)
+            LOAD_GOT_rbp
+            "pand " AMASK4_4_0 ", %%mm7  \n\t" // _amask4_4_0 (was _ActiveMask)
+            RESTORE_rbp
+            "movq %%mm3, %%mm2           \n\t" // load b=Prior(x) step 1
+            "paddb (%1," PCX ",), %%mm7  \n\t" // add Paeth predictor + Raw(x)
+            "punpcklbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
+            "movq %%mm7, (%1," PCX ",)   \n\t" // write back updated value
+            "movq %%mm7, %%mm1           \n\t" // now mm1 will be used as
+                                               // Raw(x-bpp)
+            // do second set of 4 bytes
+            "punpckhbw %%mm0, %%mm2      \n\t" // unpack Low bytes of b
+            "punpcklbw %%mm0, %%mm1      \n\t" // unpack Low bytes of a
+            // pav = p - a = (a + b - c) - a = b - c
+            "movq %%mm2, %%mm4           \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movq %%mm1, %%mm5           \n\t"
+            "psubw %%mm3, %%mm4          \n\t"
+            "pxor %%mm7, %%mm7           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "movq %%mm4, %%mm6           \n\t"
+            "psubw %%mm3, %%mm5          \n\t"
+            // pa = abs(p-a) = abs(pav)
+            // pb = abs(p-b) = abs(pbv)
+            // pc = abs(p-c) = abs(pcv)
+            "pcmpgtw %%mm4, %%mm0        \n\t" // create mask pav bytes < 0
+            "paddw %%mm5, %%mm6          \n\t"
+            "pand %%mm4, %%mm0           \n\t" // only pav bytes < 0 in mm7
+            "pcmpgtw %%mm5, %%mm7        \n\t" // create mask pbv bytes < 0
+            "psubw %%mm0, %%mm4          \n\t"
+            "pand %%mm5, %%mm7           \n\t" // only pbv bytes < 0 in mm0
+            "psubw %%mm0, %%mm4          \n\t"
+            "psubw %%mm7, %%mm5          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            "pcmpgtw %%mm6, %%mm0        \n\t" // create mask pcv bytes < 0
+            "pand %%mm6, %%mm0           \n\t" // only pav bytes < 0 in mm7
+            "psubw %%mm7, %%mm5          \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            //  test pa <= pb
+            "movq %%mm4, %%mm7           \n\t"
+            "psubw %%mm0, %%mm6          \n\t"
+            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
+            "movq %%mm7, %%mm0           \n\t"
+            // use mm7 mask to merge pa & pb
+            "pand %%mm7, %%mm5           \n\t"
+            // use mm0 mask copy to merge a & b
+            "pand %%mm0, %%mm2           \n\t"
+            "pandn %%mm4, %%mm7          \n\t"
+            "pandn %%mm1, %%mm0          \n\t"
+            "paddw %%mm5, %%mm7          \n\t"
+            "paddw %%mm2, %%mm0          \n\t"
+            //  test  ((pa <= pb)? pa:pb) <= pc
+            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
+            "pxor %%mm1, %%mm1           \n\t"
+            "pand %%mm7, %%mm3           \n\t"
+            "pandn %%mm0, %%mm7          \n\t"
+            "pxor %%mm1, %%mm1           \n\t"
+            "paddw %%mm3, %%mm7          \n\t"
+            "pxor %%mm0, %%mm0           \n\t"
+            // step ecx to next set of 8 bytes and repeat loop til done
+            "addl $8, %%ecx              \n\t"
+            "packuswb %%mm7, %%mm1       \n\t"
+            "paddb -8(%1," PCX ",), %%mm1 \n\t" // add predictor with Raw(x)
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
+            "movq %%mm1, -8(%1," PCX ",) \n\t" // write back updated value
+                                 // mm1 will be used as Raw(x-bpp) next loop
+            "jb paeth_4lp                \n\t"
+
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
+
+            : "0" (prev_row),  // esi/rsi      // input regs
+              "1" (row),       // edi/rdi
+              "2" (diff),      // ecx
+              "3" (MMXLength)  // eax
+
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
+            , "%mm4", "%mm5", "%mm6", "%mm7"
+#endif
+         );
+      }
+      break;  // end 4 bpp
+
+      case 1:
+      case 2:
+      {
+         __asm__ __volatile__ (
+// preload  "movl diff, %%eax            \n\t" // eax: x = offset to align. bdry
+// preload  "movl FullLength, %%edx      \n\t"
+            "cmpl %%edx, %%eax           \n\t"
+            "jnb paeth_dend              \n\t"
+
+            SAVE_ebp
+
+// preload  "movl row, %2                \n\t" // edi/rdi
+            // do Paeth decode for remaining bytes
+// preload  "movl prev_row, %1           \n\t" // esi/rsi
+            "movl %%eax, %%ebp           \n\t"
+// preload  "subl bpp, %%ebp             \n\t" // (bpp is preloaded into ecx)
+            "subl %%ecx, %%ebp           \n\t" // ebp = eax - bpp
+            "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx
+
+            SAVE_GOT_ebx
+            SAVE_r11_r12_r13
+
+         "paeth_dlp:                     \n\t"
+            "xorl %%ebx, %%ebx           \n\t"
+            // pav = p - a = (a + b - c) - a = b - c
+            "movb (%1," PAX ",), %%bl    \n\t" // load Prior(x) into bl
+            "movb (%1," PBP ",), %%cl    \n\t" // load Prior(x-bpp) into cl
+            "subl %%ecx, %%ebx           \n\t" // subtract Prior(x-bpp)
+            "movl %%ebx, " pa_TEMP "     \n\t" // Save pav for later use
+            "xorl %%ebx, %%ebx           \n\t"
+            // pbv = p - b = (a + b - c) - b = a - c
+            "movb (%2," PBP ",), %%bl    \n\t" // load Raw(x-bpp) into bl
+            "subl %%ecx, %%ebx           \n\t" // subtract Prior(x-bpp)
+            "movl %%ebx, %%ecx           \n\t"
+            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
+            "addl " pa_TEMP ", %%ebx     \n\t" // pcv = pav + pbv
+            // pc = abs(pcv)
+            "testl $0x80000000, %%ebx    \n\t"
+            "jz paeth_dpca               \n\t"
+            "negl %%ebx                  \n\t" // reverse sign of neg values
+
+         "paeth_dpca:                    \n\t"
+            "movl %%ebx, " pc_TEMP "     \n\t" // save pc for later use
+            // pb = abs(pbv)
+            "testl $0x80000000, %%ecx    \n\t"
+            "jz paeth_dpba               \n\t"
+            "negl %%ecx                  \n\t" // reverse sign of neg values
+
+         "paeth_dpba:                    \n\t"
+            "movl %%ecx, " pb_TEMP "     \n\t" // save pb for later use
+            // pa = abs(pav)
+            "movl " pa_TEMP ", %%ebx     \n\t"
+            "testl $0x80000000, %%ebx    \n\t"
+            "jz paeth_dpaa               \n\t"
+            "negl %%ebx                  \n\t" // reverse sign of neg values
+
+         "paeth_dpaa:                    \n\t"
+            "movl %%ebx, " pa_TEMP "     \n\t" // save pa for later use
+            // test if pa <= pb
+            "cmpl %%ecx, %%ebx           \n\t"
+            "jna paeth_dabb              \n\t"
+            // pa > pb; now test if pb <= pc
+            "cmpl " pc_TEMP ", %%ecx     \n\t"
+            "jna paeth_dbbc              \n\t"
+            // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+            "movb (%1," PBP ",), %%cl    \n\t" // load Prior(x-bpp) into cl
+            "jmp paeth_dpaeth            \n\t"
+
+         "paeth_dbbc:                    \n\t"
+            // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
+            "movb (%1," PAX ",), %%cl    \n\t" // load Prior(x) into cl
+            "jmp paeth_dpaeth            \n\t"
+
+         "paeth_dabb:                    \n\t"
+            // pa <= pb; now test if pa <= pc
+            "cmpl " pc_TEMP ", %%ebx     \n\t"
+            "jna paeth_dabc              \n\t"
+            // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
+            "movb (%1," PBP ",), %%cl   \n\t" // load Prior(x-bpp) into cl
+            "jmp paeth_dpaeth            \n\t"
+
+         "paeth_dabc:                    \n\t"
+            // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
+            "movb (%2," PBP ",), %%cl    \n\t" // load Raw(x-bpp) into cl
+
+         "paeth_dpaeth:                  \n\t"
+            "incl %%eax                  \n\t"
+            "incl %%ebp                  \n\t"
+            // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
+            "addb %%cl, -1(%2," PAX ",)  \n\t"
+            "cmpl %%edx, %%eax           \n\t" // check against FullLength
+            "jb paeth_dlp                \n\t"
+
+            RESTORE_r11_r12_r13
+            RESTORE_GOT_ebx
+            RESTORE_ebp
+
+         "paeth_dend:                    \n\t"
+
+            : "=c" (dummy_value_c),            // output regs (dummy)
+              "=S" (dummy_value_S),
+              "=D" (dummy_value_D),
+              "=a" (dummy_value_a),
+              "=d" (dummy_value_d)
+
+            : "0" (bpp),         // ecx        // input regs
+              "1" (prev_row),    // esi/rsi
+              "2" (row),         // edi/rdi
+              "3" (diff),        // eax
+              "4" (FullLength)   // edx
+
+            CLOB_COLON_ebx_ebp_r1X             // clobber list
+              CLOBBER_GOT_ebx
+              CLOB_COMMA_ebx_ebp
+              CLOBBER_ebp
+              CLOB_COMMA_ebX_r1X
+              CLOBBER_r11_r12_r13
+         );
+      }
+      return; // end 1 or 2 bpp (no need to go further with this one)
+
+      case 6:
+      {
+//       _ActiveMask2 = 0xffffffff00000000LL;  // NOT USED ("_amask_0_4_4")
+//       _ShiftBpp = 48;       // bpp << 3 == bpp * 8
+//       _ShiftRem = 16;       // 64 - _ShiftBpp
 
          __asm__ __volatile__ (
-            "movl _dif, %%ecx            \n\t"
-// preload  "movl row, %%edi             \n\t"
-// preload  "movl prev_row, %%esi        \n\t"
+// preload  "movl diff, %%ecx            \n\t"
+// preload  "movl row, %1                \n\t" // edi/rdi
+// preload  "movl prev_row, %0           \n\t" // esi/rsi
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
+            "movq -8(%1," PCX ",), %%mm1 \n\t"
             "pxor %%mm0, %%mm0           \n\t"
 
          "paeth_6lp:                     \n\t"
             // must shift to position Raw(x-bpp) data
-            "psrlq _ShiftRem, %%mm1      \n\t"
+            "psrlq $16, %%mm1            \n\t" // was _ShiftRem
             // do first set of 4 bytes
-            "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "movq -8(%0," PCX ",), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
             "punpcklbw %%mm0, %%mm1      \n\t" // unpack Low bytes of a
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x)
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x)
             "punpcklbw %%mm0, %%mm2      \n\t" // unpack Low bytes of b
             // must shift to position Prior(x-bpp) data
-            "psrlq _ShiftRem, %%mm3      \n\t"
+            "psrlq $16, %%mm3            \n\t" // was _ShiftRem
             // pav = p - a = (a + b - c) - a = b - c
             "movq %%mm2, %%mm4           \n\t"
             "punpcklbw %%mm0, %%mm3      \n\t" // unpack Low bytes of c
@@ -3887,19 +4728,21 @@
             "paddw %%mm3, %%mm7          \n\t"
             "pxor %%mm0, %%mm0           \n\t"
             "packuswb %%mm1, %%mm7       \n\t"
-            "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // load c=Prior(x-bpp)
-            "pand _ActiveMask, %%mm7     \n\t"
-            "psrlq _ShiftRem, %%mm3      \n\t"
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x) step 1
-            "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor and Raw(x)
+            "movq -8(%0," PCX ",), %%mm3 \n\t" // load c=Prior(x-bpp)
+            LOAD_GOT_rbp
+            "pand " AMASK4_4_0 ", %%mm7  \n\t" // _amask4_4_0 (was _ActiveMask)
+            RESTORE_rbp
+            "psrlq $16, %%mm3            \n\t"
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x) step 1
+            "paddb (%1," PCX ",), %%mm7  \n\t" // add Paeth predictor + Raw(x)
             "movq %%mm2, %%mm6           \n\t"
-            "movq %%mm7, (%%edi,%%ecx,)  \n\t" // write back updated value
-            "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
-            "psllq _ShiftBpp, %%mm6      \n\t"
+            "movq %%mm7, (%1," PCX ",)   \n\t" // write back updated value
+            "movq -8(%1," PCX ",), %%mm1 \n\t"
+            "psllq $48, %%mm6            \n\t" // bpp * 8 = bits per pixel
             "movq %%mm7, %%mm5           \n\t"
-            "psrlq _ShiftRem, %%mm1      \n\t"
+            "psrlq $16, %%mm1            \n\t" // 64 - (bpp * 8) = remainder
             "por %%mm6, %%mm3            \n\t"
-            "psllq _ShiftBpp, %%mm5      \n\t"
+            "psllq $48, %%mm5            \n\t" // was _ShiftBpp
             "punpckhbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
             "por %%mm5, %%mm1            \n\t"
             // do second set of 4 bytes
@@ -3954,189 +4797,45 @@
             // step ecx to next set of 8 bytes and repeat loop til done
             "addl $8, %%ecx              \n\t"
             "packuswb %%mm7, %%mm1       \n\t"
-            "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)
-            "cmpl _MMXLength, %%ecx      \n\t"
-            "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value
-                                // mm1 will be used as Raw(x-bpp) next loop
+            "paddb -8(%1," PCX ",), %%mm1 \n\t" // add Paeth predictor + Raw(x)
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
+            "movq %%mm1, -8(%1," PCX ",) \n\t" // write back updated value
+                                 // mm1 will be used as Raw(x-bpp) next loop
             "jb paeth_6lp                \n\t"
 
-            : "=S" (dummy_value_S),             // output regs (dummy)
-              "=D" (dummy_value_D)
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi           // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),  // esi/rsi      // input regs
+              "1" (row),       // edi/rdi
+              "2" (diff),      // ecx
+              "3" (MMXLength)  // eax
 
-            : "%ecx"                            // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
             , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
       break;  // end 6 bpp
 
-      case 4:
-      {
-         _ActiveMask.use  = 0x00000000ffffffffLL;
-
-         __asm__ __volatile__ (
-            "movl _dif, %%ecx            \n\t"
-// preload  "movl row, %%edi             \n\t"
-// preload  "movl prev_row, %%esi        \n\t"
-            "pxor %%mm0, %%mm0           \n\t"
-            // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm1 \n\t" // only time should need to read
-                                     //  a=Raw(x-bpp) bytes
-         "paeth_4lp:                     \n\t"
-            // do first set of 4 bytes
-            "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
-            "punpckhbw %%mm0, %%mm1      \n\t" // unpack Low bytes of a
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x)
-            "punpcklbw %%mm0, %%mm2      \n\t" // unpack High bytes of b
-            // pav = p - a = (a + b - c) - a = b - c
-            "movq %%mm2, %%mm4           \n\t"
-            "punpckhbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
-            // pbv = p - b = (a + b - c) - b = a - c
-            "movq %%mm1, %%mm5           \n\t"
-            "psubw %%mm3, %%mm4          \n\t"
-            "pxor %%mm7, %%mm7           \n\t"
-            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
-            "movq %%mm4, %%mm6           \n\t"
-            "psubw %%mm3, %%mm5          \n\t"
-            // pa = abs(p-a) = abs(pav)
-            // pb = abs(p-b) = abs(pbv)
-            // pc = abs(p-c) = abs(pcv)
-            "pcmpgtw %%mm4, %%mm0        \n\t" // create mask pav bytes < 0
-            "paddw %%mm5, %%mm6          \n\t"
-            "pand %%mm4, %%mm0           \n\t" // only pav bytes < 0 in mm7
-            "pcmpgtw %%mm5, %%mm7        \n\t" // create mask pbv bytes < 0
-            "psubw %%mm0, %%mm4          \n\t"
-            "pand %%mm5, %%mm7           \n\t" // only pbv bytes < 0 in mm0
-            "psubw %%mm0, %%mm4          \n\t"
-            "psubw %%mm7, %%mm5          \n\t"
-            "pxor %%mm0, %%mm0           \n\t"
-            "pcmpgtw %%mm6, %%mm0        \n\t" // create mask pcv bytes < 0
-            "pand %%mm6, %%mm0           \n\t" // only pav bytes < 0 in mm7
-            "psubw %%mm7, %%mm5          \n\t"
-            "psubw %%mm0, %%mm6          \n\t"
-            //  test pa <= pb
-            "movq %%mm4, %%mm7           \n\t"
-            "psubw %%mm0, %%mm6          \n\t"
-            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
-            "movq %%mm7, %%mm0           \n\t"
-            // use mm7 mask to merge pa & pb
-            "pand %%mm7, %%mm5           \n\t"
-            // use mm0 mask copy to merge a & b
-            "pand %%mm0, %%mm2           \n\t"
-            "pandn %%mm4, %%mm7          \n\t"
-            "pandn %%mm1, %%mm0          \n\t"
-            "paddw %%mm5, %%mm7          \n\t"
-            "paddw %%mm2, %%mm0          \n\t"
-            //  test  ((pa <= pb)? pa:pb) <= pc
-            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
-            "pxor %%mm1, %%mm1           \n\t"
-            "pand %%mm7, %%mm3           \n\t"
-            "pandn %%mm0, %%mm7          \n\t"
-            "paddw %%mm3, %%mm7          \n\t"
-            "pxor %%mm0, %%mm0           \n\t"
-            "packuswb %%mm1, %%mm7       \n\t"
-            "movq (%%esi,%%ecx,), %%mm3  \n\t" // load c=Prior(x-bpp)
-            "pand _ActiveMask, %%mm7     \n\t"
-            "movq %%mm3, %%mm2           \n\t" // load b=Prior(x) step 1
-            "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
-            "punpcklbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
-            "movq %%mm7, (%%edi,%%ecx,)  \n\t" // write back updated value
-            "movq %%mm7, %%mm1           \n\t" // now mm1 will be used as Raw(x-bpp)
-            // do second set of 4 bytes
-            "punpckhbw %%mm0, %%mm2      \n\t" // unpack Low bytes of b
-            "punpcklbw %%mm0, %%mm1      \n\t" // unpack Low bytes of a
-            // pav = p - a = (a + b - c) - a = b - c
-            "movq %%mm2, %%mm4           \n\t"
-            // pbv = p - b = (a + b - c) - b = a - c
-            "movq %%mm1, %%mm5           \n\t"
-            "psubw %%mm3, %%mm4          \n\t"
-            "pxor %%mm7, %%mm7           \n\t"
-            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
-            "movq %%mm4, %%mm6           \n\t"
-            "psubw %%mm3, %%mm5          \n\t"
-            // pa = abs(p-a) = abs(pav)
-            // pb = abs(p-b) = abs(pbv)
-            // pc = abs(p-c) = abs(pcv)
-            "pcmpgtw %%mm4, %%mm0        \n\t" // create mask pav bytes < 0
-            "paddw %%mm5, %%mm6          \n\t"
-            "pand %%mm4, %%mm0           \n\t" // only pav bytes < 0 in mm7
-            "pcmpgtw %%mm5, %%mm7        \n\t" // create mask pbv bytes < 0
-            "psubw %%mm0, %%mm4          \n\t"
-            "pand %%mm5, %%mm7           \n\t" // only pbv bytes < 0 in mm0
-            "psubw %%mm0, %%mm4          \n\t"
-            "psubw %%mm7, %%mm5          \n\t"
-            "pxor %%mm0, %%mm0           \n\t"
-            "pcmpgtw %%mm6, %%mm0        \n\t" // create mask pcv bytes < 0
-            "pand %%mm6, %%mm0           \n\t" // only pav bytes < 0 in mm7
-            "psubw %%mm7, %%mm5          \n\t"
-            "psubw %%mm0, %%mm6          \n\t"
-            //  test pa <= pb
-            "movq %%mm4, %%mm7           \n\t"
-            "psubw %%mm0, %%mm6          \n\t"
-            "pcmpgtw %%mm5, %%mm7        \n\t" // pa > pb?
-            "movq %%mm7, %%mm0           \n\t"
-            // use mm7 mask to merge pa & pb
-            "pand %%mm7, %%mm5           \n\t"
-            // use mm0 mask copy to merge a & b
-            "pand %%mm0, %%mm2           \n\t"
-            "pandn %%mm4, %%mm7          \n\t"
-            "pandn %%mm1, %%mm0          \n\t"
-            "paddw %%mm5, %%mm7          \n\t"
-            "paddw %%mm2, %%mm0          \n\t"
-            //  test  ((pa <= pb)? pa:pb) <= pc
-            "pcmpgtw %%mm6, %%mm7        \n\t" // pab > pc?
-            "pxor %%mm1, %%mm1           \n\t"
-            "pand %%mm7, %%mm3           \n\t"
-            "pandn %%mm0, %%mm7          \n\t"
-            "pxor %%mm1, %%mm1           \n\t"
-            "paddw %%mm3, %%mm7          \n\t"
-            "pxor %%mm0, %%mm0           \n\t"
-            // step ecx to next set of 8 bytes and repeat loop til done
-            "addl $8, %%ecx              \n\t"
-            "packuswb %%mm7, %%mm1       \n\t"
-            "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add predictor with Raw(x)
-            "cmpl _MMXLength, %%ecx      \n\t"
-            "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value
-                                // mm1 will be used as Raw(x-bpp) next loop
-            "jb paeth_4lp                \n\t"
-
-            : "=S" (dummy_value_S),             // output regs (dummy)
-              "=D" (dummy_value_D)
-
-            : "0" (prev_row),  // esi           // input regs
-              "1" (row)        // edi
-
-            : "%ecx"                            // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
-            , "%mm4", "%mm5", "%mm6", "%mm7"
-#endif
-         );
-      }
-      break;  // end 4 bpp
-
       case 8:                          // bpp == 8
       {
-         _ActiveMask.use  = 0x00000000ffffffffLL;
-
          __asm__ __volatile__ (
-            "movl _dif, %%ecx            \n\t"
-// preload  "movl row, %%edi             \n\t"
-// preload  "movl prev_row, %%esi        \n\t"
+// preload  "movl diff, %%ecx            \n\t"
+// preload  "movl row, %1                \n\t" // edi/rdi
+// preload  "movl prev_row, %0           \n\t" // esi/rsi
             "pxor %%mm0, %%mm0           \n\t"
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%ecx,), %%mm1 \n\t" // only time should need to read
-                                       //  a=Raw(x-bpp) bytes
+            "movq -8(%1," PCX ",), %%mm1 \n\t" // only time should need to read
+                                               //  a=Raw(x-bpp) bytes
          "paeth_8lp:                     \n\t"
             // do first set of 4 bytes
-            "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            "movq -8(%0," PCX ",), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
             "punpcklbw %%mm0, %%mm1      \n\t" // unpack Low bytes of a
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x)
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x)
             "punpcklbw %%mm0, %%mm2      \n\t" // unpack Low bytes of b
             // pav = p - a = (a + b - c) - a = b - c
             "movq %%mm2, %%mm4           \n\t"
@@ -4185,13 +4884,15 @@
             "paddw %%mm3, %%mm7          \n\t"
             "pxor %%mm0, %%mm0           \n\t"
             "packuswb %%mm1, %%mm7       \n\t"
-            "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
-            "pand _ActiveMask, %%mm7     \n\t"
-            "movq (%%esi,%%ecx,), %%mm2  \n\t" // load b=Prior(x)
-            "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x)
+            "movq -8(%0," PCX ",), %%mm3 \n\t" // read c=Prior(x-bpp) bytes
+            LOAD_GOT_rbp
+            "pand " AMASK4_4_0 ", %%mm7  \n\t" // _amask4_4_0 (was _ActiveMask)
+            RESTORE_rbp
+            "movq (%0," PCX ",), %%mm2   \n\t" // load b=Prior(x)
+            "paddb (%1," PCX ",), %%mm7  \n\t" // add Paeth predictor + Raw(x)
             "punpckhbw %%mm0, %%mm3      \n\t" // unpack High bytes of c
-            "movq %%mm7, (%%edi,%%ecx,)  \n\t" // write back updated value
-            "movq -8(%%edi,%%ecx,), %%mm1 \n\t" // read a=Raw(x-bpp) bytes
+            "movq %%mm7, (%1," PCX ",)   \n\t" // write back updated value
+            "movq -8(%1," PCX ",), %%mm1 \n\t" // read a=Raw(x-bpp) bytes
 
             // do second set of 4 bytes
             "punpckhbw %%mm0, %%mm2      \n\t" // unpack High bytes of b
@@ -4245,256 +4946,171 @@
             // step ecx to next set of 8 bytes and repeat loop til done
             "addl $8, %%ecx              \n\t"
             "packuswb %%mm7, %%mm1       \n\t"
-            "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add Paeth predictor with Raw(x)
-            "cmpl _MMXLength, %%ecx      \n\t"
-            "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value
-                            // mm1 will be used as Raw(x-bpp) next loop
+            "paddb -8(%1," PCX ",), %%mm1 \n\t" // add Paeth predictor + Raw(x)
+            "cmpl %%eax, %%ecx           \n\t" // MMXLength
+            "movq %%mm1, -8(%1," PCX ",) \n\t" // write back updated value
+                                 // mm1 will be used as Raw(x-bpp) next loop
             "jb paeth_8lp                \n\t"
 
-            : "=S" (dummy_value_S),             // output regs (dummy)
-              "=D" (dummy_value_D)
+            : "=S" (dummy_value_S),            // output regs (dummy)
+              "=D" (dummy_value_D),
+              "=c" (dummy_value_c),
+              "=a" (dummy_value_a)
 
-            : "0" (prev_row),  // esi           // input regs
-              "1" (row)        // edi
+            : "0" (prev_row),  // esi/rsi      // input regs
+              "1" (row),       // edi/rdi
+              "2" (diff),      // ecx
+              "3" (MMXLength)  // eax
 
-            : "%ecx"                            // clobber list
-#if 0  /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm2", "%mm3"   // clobber list
             , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
       break;  // end 8 bpp
 
-      case 1:                // bpp = 1
-      case 2:                // bpp = 2
-      default:               // bpp > 8
+      default:                // bpp != 1,2,3,4,6,8:  doesn't exist
       {
-         __asm__ __volatile__ (
-#ifdef __PIC__
-            "pushl %%ebx                 \n\t" // save Global Offset Table index
+         // ERROR:  SHOULD NEVER BE REACHED
+#if defined(PNG_DEBUG)
+         png_debug(1, "Internal libpng logic error (GCC "
+           "png_read_filter_row_mmx_paeth())\n");
 #endif
-            "movl _dif, %%ebx            \n\t"
-            "cmpl _FullLength, %%ebx     \n\t"
-            "jnb paeth_dend              \n\t"
-
-// preload  "movl row, %%edi             \n\t"
-// preload  "movl prev_row, %%esi        \n\t"
-            // do Paeth decode for remaining bytes
-            "movl %%ebx, %%edx           \n\t"
-// preload  "subl bpp, %%edx             \n\t" // (bpp is preloaded into ecx)
-            "subl %%ecx, %%edx           \n\t" // edx = ebx - bpp
-            "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx
-
-         "paeth_dlp:                     \n\t"
-            "xorl %%eax, %%eax           \n\t"
-            // pav = p - a = (a + b - c) - a = b - c
-            "movb (%%esi,%%ebx,), %%al   \n\t" // load Prior(x) into al
-            "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
-            "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
-            "movl %%eax, _patemp         \n\t" // Save pav for later use
-            "xorl %%eax, %%eax           \n\t"
-            // pbv = p - b = (a + b - c) - b = a - c
-            "movb (%%edi,%%edx,), %%al   \n\t" // load Raw(x-bpp) into al
-            "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
-            "movl %%eax, %%ecx           \n\t"
-            // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
-            "addl _patemp, %%eax         \n\t" // pcv = pav + pbv
-            // pc = abs(pcv)
-            "testl $0x80000000, %%eax    \n\t"
-            "jz paeth_dpca               \n\t"
-            "negl %%eax                  \n\t" // reverse sign of neg values
-
-         "paeth_dpca:                    \n\t"
-            "movl %%eax, _pctemp         \n\t" // save pc for later use
-            // pb = abs(pbv)
-            "testl $0x80000000, %%ecx    \n\t"
-            "jz paeth_dpba               \n\t"
-            "negl %%ecx                  \n\t" // reverse sign of neg values
-
-         "paeth_dpba:                    \n\t"
-            "movl %%ecx, _pbtemp         \n\t" // save pb for later use
-            // pa = abs(pav)
-            "movl _patemp, %%eax         \n\t"
-            "testl $0x80000000, %%eax    \n\t"
-            "jz paeth_dpaa               \n\t"
-            "negl %%eax                  \n\t" // reverse sign of neg values
-
-         "paeth_dpaa:                    \n\t"
-            "movl %%eax, _patemp         \n\t" // save pa for later use
-            // test if pa <= pb
-            "cmpl %%ecx, %%eax           \n\t"
-            "jna paeth_dabb              \n\t"
-            // pa > pb; now test if pb <= pc
-            "cmpl _pctemp, %%ecx         \n\t"
-            "jna paeth_dbbc              \n\t"
-            // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
-            "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
-            "jmp paeth_dpaeth            \n\t"
-
-         "paeth_dbbc:                    \n\t"
-            // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
-            "movb (%%esi,%%ebx,), %%cl   \n\t" // load Prior(x) into cl
-            "jmp paeth_dpaeth            \n\t"
-
-         "paeth_dabb:                    \n\t"
-            // pa <= pb; now test if pa <= pc
-            "cmpl _pctemp, %%eax         \n\t"
-            "jna paeth_dabc              \n\t"
-            // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
-            "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
-            "jmp paeth_dpaeth            \n\t"
-
-         "paeth_dabc:                    \n\t"
-            // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
-            "movb (%%edi,%%edx,), %%cl   \n\t" // load Raw(x-bpp) into cl
-
-         "paeth_dpaeth:                  \n\t"
-            "incl %%ebx                  \n\t"
-            "incl %%edx                  \n\t"
-            // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
-            "addb %%cl, -1(%%edi,%%ebx,) \n\t"
-            "cmpl _FullLength, %%ebx     \n\t"
-            "jb paeth_dlp                \n\t"
-
-         "paeth_dend:                    \n\t"
-#ifdef __PIC__
-            "popl %%ebx                  \n\t" // index to Global Offset Table
-#endif
-
-            : "=c" (dummy_value_c),            // output regs (dummy)
-              "=S" (dummy_value_S),
-              "=D" (dummy_value_D)
-
-            : "0" (bpp),       // ecx          // input regs
-              "1" (prev_row),  // esi
-              "2" (row)        // edi
-
-            : "%eax", "%edx"                   // clobber list
-#ifndef __PIC__
-            , "%ebx"
-#endif
-         );
       }
-      return;                   // No need to go further with this one
+      break;
 
    } // end switch (bpp)
 
    __asm__ __volatile__ (
       // MMX acceleration complete; now do clean-up
       // check if any remaining bytes left to decode
-#ifdef __PIC__
-      "pushl %%ebx                 \n\t" // save index to Global Offset Table
-#endif
-      "movl _MMXLength, %%ebx      \n\t"
-      "cmpl _FullLength, %%ebx     \n\t"
+//pre "movl FullLength, %%edx      \n\t"
+//pre "movl MMXLength, %%eax       \n\t"
+      "cmpl %%edx, %%eax           \n\t"
       "jnb paeth_end               \n\t"
-//pre "movl row, %%edi             \n\t"
-//pre "movl prev_row, %%esi        \n\t"
+
+      SAVE_ebp
+
+//pre "movl row, %2                \n\t" // edi/rdi
+//pre "movl prev_row, %1           \n\t" // esi/rsi
       // do Paeth decode for remaining bytes
-      "movl %%ebx, %%edx           \n\t"
-//pre "subl bpp, %%edx             \n\t" // (bpp is preloaded into ecx)
-      "subl %%ecx, %%edx           \n\t" // edx = ebx - bpp
+      "movl %%eax, %%ebp           \n\t"
+//pre "subl bpp, %%ebp             \n\t" // (bpp is preloaded into ecx)
+      "subl %%ecx, %%ebp           \n\t" // ebp = eax - bpp
       "xorl %%ecx, %%ecx           \n\t" // zero ecx before using cl & cx below
 
+      SAVE_GOT_ebx
+      SAVE_r11_r12_r13
+
    "paeth_lp2:                     \n\t"
-      "xorl %%eax, %%eax           \n\t"
+      "xorl %%ebx, %%ebx           \n\t"
       // pav = p - a = (a + b - c) - a = b - c
-      "movb (%%esi,%%ebx,), %%al   \n\t" // load Prior(x) into al
-      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
-      "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
-      "movl %%eax, _patemp         \n\t" // Save pav for later use
-      "xorl %%eax, %%eax           \n\t"
+      "movb (%1," PAX ",), %%bl    \n\t" // load Prior(x) into bl
+      "movb (%1," PBP ",), %%cl    \n\t" // load Prior(x-bpp) into cl
+      "subl %%ecx, %%ebx           \n\t" // subtract Prior(x-bpp)
+      "movl %%ebx, " pa_TEMP "     \n\t" // Save pav for later use
+      "xorl %%ebx, %%ebx           \n\t"
       // pbv = p - b = (a + b - c) - b = a - c
-      "movb (%%edi,%%edx,), %%al   \n\t" // load Raw(x-bpp) into al
-      "subl %%ecx, %%eax           \n\t" // subtract Prior(x-bpp)
-      "movl %%eax, %%ecx           \n\t"
-      // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv
-      "addl _patemp, %%eax         \n\t" // pcv = pav + pbv
+      "movb (%2," PBP ",), %%bl    \n\t" // load Raw(x-bpp) into bl
+      "subl %%ecx, %%ebx           \n\t" // subtract Prior(x-bpp)
+      "movl %%ebx, %%ecx           \n\t"
+      // pcv = p - c = (a + b - c) - c = (a - c) + (b - c) = pav + pbv
+      "addl " pa_TEMP ", %%ebx     \n\t" // pcv = pav + pbv
       // pc = abs(pcv)
-      "testl $0x80000000, %%eax    \n\t"
+      "testl $0x80000000, %%ebx    \n\t"
       "jz paeth_pca2               \n\t"
-      "negl %%eax                  \n\t" // reverse sign of neg values
+      "negl %%ebx                  \n\t" // reverse sign of neg values
 
    "paeth_pca2:                    \n\t"
-      "movl %%eax, _pctemp         \n\t" // save pc for later use
+      "movl %%ebx, " pc_TEMP "     \n\t" // save pc for later use
       // pb = abs(pbv)
       "testl $0x80000000, %%ecx    \n\t"
       "jz paeth_pba2               \n\t"
       "negl %%ecx                  \n\t" // reverse sign of neg values
 
    "paeth_pba2:                    \n\t"
-      "movl %%ecx, _pbtemp         \n\t" // save pb for later use
+      "movl %%ecx, " pb_TEMP "     \n\t" // save pb for later use
       // pa = abs(pav)
-      "movl _patemp, %%eax         \n\t"
-      "testl $0x80000000, %%eax    \n\t"
+      "movl " pa_TEMP ", %%ebx     \n\t"
+      "testl $0x80000000, %%ebx    \n\t"
       "jz paeth_paa2               \n\t"
-      "negl %%eax                  \n\t" // reverse sign of neg values
+      "negl %%ebx                  \n\t" // reverse sign of neg values
 
    "paeth_paa2:                    \n\t"
-      "movl %%eax, _patemp         \n\t" // save pa for later use
+      "movl %%ebx, " pa_TEMP "     \n\t" // save pa for later use
       // test if pa <= pb
-      "cmpl %%ecx, %%eax           \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
       "jna paeth_abb2              \n\t"
       // pa > pb; now test if pb <= pc
-      "cmpl _pctemp, %%ecx         \n\t"
+      "cmpl " pc_TEMP ", %%ecx     \n\t"
       "jna paeth_bbc2              \n\t"
       // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
-      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "movb (%1," PBP ",), %%cl    \n\t" // load Prior(x-bpp) into cl
       "jmp paeth_paeth2            \n\t"
 
    "paeth_bbc2:                    \n\t"
       // pb <= pc; Raw(x) = Paeth(x) + Prior(x)
-      "movb (%%esi,%%ebx,), %%cl   \n\t" // load Prior(x) into cl
+      "movb (%1," PAX ",), %%cl    \n\t" // load Prior(x) into cl
       "jmp paeth_paeth2            \n\t"
 
    "paeth_abb2:                    \n\t"
       // pa <= pb; now test if pa <= pc
-      "cmpl _pctemp, %%eax         \n\t"
+      "cmpl " pc_TEMP ", %%ebx     \n\t"
       "jna paeth_abc2              \n\t"
       // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp)
-      "movb (%%esi,%%edx,), %%cl   \n\t" // load Prior(x-bpp) into cl
+      "movb (%1," PBP ",), %%cl    \n\t" // load Prior(x-bpp) into cl
       "jmp paeth_paeth2            \n\t"
 
    "paeth_abc2:                    \n\t"
       // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp)
-      "movb (%%edi,%%edx,), %%cl   \n\t" // load Raw(x-bpp) into cl
+      "movb (%2," PBP ",), %%cl    \n\t" // load Raw(x-bpp) into cl
 
    "paeth_paeth2:                  \n\t"
-      "incl %%ebx                  \n\t"
-      "incl %%edx                  \n\t"
+      "incl %%eax                  \n\t"
+      "incl %%ebp                  \n\t"
       // Raw(x) = (Paeth(x) + Paeth_Predictor( a, b, c )) mod 256
-      "addb %%cl, -1(%%edi,%%ebx,) \n\t"
-      "cmpl _FullLength, %%ebx     \n\t"
+      "addb %%cl, -1(%2," PAX ",)  \n\t"
+      "cmpl %%edx, %%eax           \n\t" // check against FullLength
       "jb paeth_lp2                \n\t"
 
+      RESTORE_r11_r12_r13
+      RESTORE_GOT_ebx
+      RESTORE_ebp
+
    "paeth_end:                     \n\t"
       "EMMS                        \n\t" // end MMX; prep for poss. FP instrs.
-#ifdef __PIC__
-      "popl %%ebx                  \n\t" // restore index to Global Offset Table
-#endif
 
       : "=c" (dummy_value_c),            // output regs (dummy)
         "=S" (dummy_value_S),
-        "=D" (dummy_value_D)
+        "=D" (dummy_value_D),
+        "=a" (dummy_value_a),
+        "=d" (dummy_value_d)
 
-      : "0" (bpp),       // ecx          // input regs
-        "1" (prev_row),  // esi
-        "2" (row)        // edi
+      : "0" (bpp),         // ecx        // input regs
+        "1" (prev_row),    // esi/rsi
+        "2" (row),         // edi/rdi
+        "3" (MMXLength),   // eax
+        "4" (FullLength)   // edx
 
-      : "%eax", "%edx"                   // clobber list (no input regs!)
-#ifndef __PIC__
-      , "%ebx"
-#endif
+      CLOB_COLON_ebx_ebp_r1X             // clobber list
+        CLOBBER_GOT_ebx
+        CLOB_COMMA_ebx_ebp
+        CLOBBER_ebp
+        CLOB_COMMA_ebX_r1X
+        CLOBBER_r11_r12_r13
    );
 
 } /* end png_read_filter_row_mmx_paeth() */
-#endif
+
+#endif // PNG_x86_64_USE_GOTPCREL || PNG_THREAD_UNSAFE_OK
+#endif /* PNG_MMX_READ_FILTER_PAETH_SUPPORTED */
 
 
 
 
-#ifdef PNG_THREAD_UNSAFE_OK
+#if defined(PNG_MMX_READ_FILTER_SUB_SUPPORTED)
+
 //===========================================================================//
 //                                                                           //
 //           P N G _ R E A D _ F I L T E R _ R O W _ M M X _ S U B           //
@@ -4506,55 +5122,68 @@
 static void /* PRIVATE */
 png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row)
 {
+   unsigned FullLength, MMXLength;  // png_uint_32 is actually 64-bit on x86-64
    int bpp;
    int dummy_value_a;
-   int dummy_value_D;
+   int dummy_value_c;
+   int dummy_value_d;
+   png_bytep dummy_value_D;
+   int diff; //     __attribute__((used));
 
-   bpp = (row_info->pixel_depth + 7) >> 3;   // calc number of bytes per pixel
-   _FullLength = row_info->rowbytes - bpp;   // number of bytes to filter
+   bpp = (row_info->pixel_depth + 7) >> 3;  // calc number of bytes per pixel
+   FullLength = row_info->rowbytes - bpp;   // number of bytes to filter
+     // (why do we subtract off bpp?  not so in avg or paeth...)
 
    __asm__ __volatile__ (
-//pre "movl row, %%edi             \n\t"
-      "movl %%edi, %%esi           \n\t" // lp = row
-//pre "movl bpp, %%eax             \n\t"
-      "addl %%eax, %%edi           \n\t" // rp = row + bpp
-//irr "xorl %%eax, %%eax           \n\t"
-      // get # of bytes to alignment
-      "movl %%edi, _dif            \n\t" // take start of row
-      "addl $0xf, _dif             \n\t" // add 7 + 8 to incr past
-                                         //  alignment boundary
-      "xorl %%ecx, %%ecx           \n\t"
-      "andl $0xfffffff8, _dif      \n\t" // mask to alignment boundary
-      "subl %%edi, _dif            \n\t" // subtract from start ==> value
-      "jz sub_go                   \n\t" //  ecx at alignment
+      SAVE_r15
+      SAVE_ebp
+//pre "movl row, %1                \n\t" // edi/rdi
+      "mov  %1, " PSI "            \n\t" // lp = row
+//pre "movl bpp, %%ecx             \n\t"
+      "add  " PCX ", %1            \n\t" // rp = row + bpp
+//pre "movl FullLength, %%eax      \n\t" // bring in via eax...
+      SAVE_FullLength                    // ...but store for later use
+
+      "xorl %%eax, %%eax           \n\t"
+
+      // get # of bytes to alignment (note:  computing _delta_ of two pointers,
+      // so hereafter %%ebp is sufficient even on 64-bit)
+      "mov  %1, " PBP "            \n\t" // take start of row
+      "add  $0xf, " PBP "          \n\t" // add 7+8 to incr past alignment bdry
+//    "andl $0xfffffff8, %%ebp     \n\t" // mask to alignment boundary (32-bit!)
+      CLEAR_BOTTOM_3_BITS  PBP    "\n\t" // mask to alignment boundary
+      "sub  %1, " PBP "            \n\t" // subtract row ptr again => ebp =
+      "jz sub_go                   \n\t" //  target value of eax at alignment
 
    "sub_lp1:                       \n\t" // fix alignment
-      "movb (%%esi,%%ecx,), %%al   \n\t"
-      "addb %%al, (%%edi,%%ecx,)   \n\t"
-      "incl %%ecx                  \n\t"
-      "cmpl _dif, %%ecx            \n\t"
+      "movb (" PSI "," PAX ",), %%cl \n\t"
+      "addb %%cl, (%1," PAX ",)    \n\t"
+      "incl %%eax                  \n\t"
+      "cmpl %%ebp, %%eax           \n\t"
       "jb sub_lp1                  \n\t"
 
    "sub_go:                        \n\t"
-      "movl _FullLength, %%eax     \n\t"
-      "movl %%eax, %%edx           \n\t"
-      "subl %%ecx, %%edx           \n\t" // subtract alignment fix
+      RESTORE_FullLength "%%ecx    \n\t" // FullLength -> ecx
+      "movl %%ecx, %%edx           \n\t"
+      "subl %%eax, %%edx           \n\t" // subtract alignment fix
       "andl $0x00000007, %%edx     \n\t" // calc bytes over mult of 8
-      "subl %%edx, %%eax           \n\t" // drop over bytes from length
-      "movl %%eax, _MMXLength      \n\t"
+      "subl %%edx, %%ecx           \n\t" // drop over bytes from length
+//out "movl %%ecx, MMXLength       \n\t"
+      "movl %%ebp, %%eax           \n\t" // ebp = diff, but no reg constraint(?)
+      RESTORE_ebp                        //  (could swap ebp and ecx functions,
+      RESTORE_r15                        //  but %%cl issues...)
 
-      : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-        "=D" (dummy_value_D)    // 1
+      : "=c" (MMXLength),       // 0     // output regs
+        "=D" (dummy_value_D),   // 1
+        "=a" (diff)             // 2
 
-      : "0" (bpp),              // eax    // input regs
-        "1" (row)               // edi
+      : "0" (bpp),              // ecx   // input regs
+        "1" (row),              // edi
+        "2" (FullLength)        // eax
 
-      : "%esi", "%ecx", "%edx"            // clobber list
-
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-      , "%mm0", "%mm1", "%mm2", "%mm3"
-      , "%mm4", "%mm5", "%mm6", "%mm7"
-#endif
+      : "%esi", "%edx"                   // clobber list
+        _CLOBBER_r15
+        _CLOBBER_ebp
    );
 
    // now do the math for the rest of the row
@@ -4562,230 +5191,296 @@
    {
       case 3:
       {
-         _ActiveMask.use  = 0x0000ffffff000000LL;
-         _ShiftBpp.use = 24;       // == 3 * 8
-         _ShiftRem.use  = 40;      // == 64 - 24
+//       _ShiftBpp = 24;       // == 3 * 8
+//       _ShiftRem  = 40;      // == 64 - 24
 
          __asm__ __volatile__ (
-// preload  "movl row, %%edi              \n\t"
-            "movq _ActiveMask, %%mm7       \n\t" // load _ActiveMask for 2nd
-                                                //  active byte group
-            "movl %%edi, %%esi            \n\t" // lp = row
-// preload  "movl bpp, %%eax              \n\t"
-            "addl %%eax, %%edi            \n\t" // rp = row + bpp
+// preload  "mov  row, %1                 \n\t" // edi/rdi
+            LOAD_GOT_rbp
+            // load (former) _ActiveMask for 2nd active byte group
+            "movq " AMASK2_3_3 ", %%mm7   \n\t" // _amask2_3_3
+            RESTORE_rbp
+
+// notused  "mov  %1, " PSI "             \n\t" // lp = row
+// preload  "movl bpp, %%ecx              \n\t"
+            "add  " PCX ", %1             \n\t" // rp = row + bpp
             "movq %%mm7, %%mm6            \n\t"
-            "movl _dif, %%edx             \n\t"
-            "psllq _ShiftBpp, %%mm6       \n\t" // move mask in mm6 to cover
+// preload  "movl diff, %%edx             \n\t"
+            "psllq $24, %%mm6             \n\t" // move mask in mm6 to cover
                                                 //  3rd active byte group
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%edx,), %%mm1 \n\t"
+            "movq -8(%1," PDX ",), %%mm1  \n\t"
 
          "sub_3lp:                        \n\t" // shift data for adding first
-            "psrlq _ShiftRem, %%mm1       \n\t" //  bpp bytes (no need for mask;
+            "psrlq $40, %%mm1             \n\t" //  bpp bytes (no need for mask;
                                                 //  shift clears inactive bytes)
             // add 1st active group
-            "movq (%%edi,%%edx,), %%mm0   \n\t"
+            "movq (%1," PDX ",), %%mm0    \n\t"
             "paddb %%mm1, %%mm0           \n\t"
 
             // add 2nd active group
             "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
-            "psllq _ShiftBpp, %%mm1       \n\t" // shift data to pos. correctly
+            "psllq $24, %%mm1             \n\t" // shift data to pos. correctly
             "pand %%mm7, %%mm1            \n\t" // mask to use 2nd active group
             "paddb %%mm1, %%mm0           \n\t"
 
             // add 3rd active group
             "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
-            "psllq _ShiftBpp, %%mm1       \n\t" // shift data to pos. correctly
+            "psllq $24, %%mm1             \n\t" // shift data to pos. correctly
             "pand %%mm6, %%mm1            \n\t" // mask to use 3rd active group
             "addl $8, %%edx               \n\t"
             "paddb %%mm1, %%mm0           \n\t"
 
-            "cmpl _MMXLength, %%edx       \n\t"
-            "movq %%mm0, -8(%%edi,%%edx,) \n\t" // write updated Raws to array
+            "cmpl %%eax, %%edx            \n\t" // MMXLength
+            "movq %%mm0, -8(%1," PDX ",)  \n\t" // write updated Raws to array
             "movq %%mm0, %%mm1            \n\t" // prep 1st add at top of loop
             "jb sub_3lp                   \n\t"
 
-            : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-              "=D" (dummy_value_D)    // 1
+            : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+              "=D" (dummy_value_D),   // 1
+              "=d" (dummy_value_d),   // 2
+              "=a" (dummy_value_a)    // 3
 
-            : "0" (bpp),              // eax    // input regs
-              "1" (row)               // edi
+            : "0" (bpp),              // ecx    // input regs
+              "1" (row),              // edi
+              "2" (diff),             // edx
+              "3" (MMXLength)         // eax
 
-            : "%edx", "%esi"                    // clobber list
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm6", "%mm7"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm6", "%mm7"    // clobber list
 #endif
          );
       }
-      break;
+      break;  // end 3 bpp
+
+      case 4:   // formerly shared with 6 bpp case via _ShiftBpp and _ShiftRem,
+      {         // but 64-bit PIC/.so problems (could still share, moving vars
+                // into unused MMX regs via ecx/edx, but kludgy)
+//       _ShiftBpp = bpp << 3;        // 32 (psllq)
+//       _ShiftRem = 64 - _ShiftBpp;  // 32 (psrlq)
+
+         __asm__ __volatile__ (
+// preload  "mov  row, %1                 \n\t" // edi/rdi
+// preload  "movl diff, %%edx             \n\t"
+// notused  "mov  %1, " PSI "             \n\t" // lp = row
+// preload  "movl bpp, %%ecx              \n\t"
+            "add  " PCX ", %1             \n\t" // rp = row + bpp
+
+            // prime the pump:  load the first Raw(x-bpp) data set
+            "movq -8(%1," PDX ",), %%mm1  \n\t"
+
+         "sub_4lp:                        \n\t" // shift data for adding first
+            "psrlq $32, %%mm1             \n\t" //  bpp bytes (no need for mask;
+                                                //  shift clears inactive bytes)
+            "movq (%1," PDX ",), %%mm0    \n\t"
+            "paddb %%mm1, %%mm0           \n\t"
+
+            // add 2nd active group
+            "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
+            "psllq $32, %%mm1             \n\t" // shift data to pos. correctly
+            "addl $8, %%edx               \n\t"
+            "paddb %%mm1, %%mm0           \n\t"
+
+            "cmpl %%eax, %%edx            \n\t" // MMXLength
+            "movq %%mm0, -8(%1," PDX ",)  \n\t" // write updated Raws to array
+            "movq %%mm0, %%mm1            \n\t" // prep 1st add at top of loop
+            "jb sub_4lp                   \n\t"
+
+            : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+              "=D" (dummy_value_D),   // 1
+              "=d" (dummy_value_d),   // 2
+              "=a" (dummy_value_a)    // 3
+
+            : "0" (bpp),              // ecx    // input regs
+              "1" (row),              // edi
+              "2" (diff),             // edx
+              "3" (MMXLength)         // eax
+
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1"                    // clobber list
+#endif
+         );
+      }
+      break;  // end 4 bpp
 
       case 1:
       {
          __asm__ __volatile__ (
-            "movl _dif, %%edx            \n\t"
-// preload  "movl row, %%edi             \n\t"
-            "cmpl _FullLength, %%edx     \n\t"
-            "jnb sub_1end                \n\t"
-            "movl %%edi, %%esi           \n\t" // lp = row
-            "xorl %%eax, %%eax           \n\t"
-// preload  "movl bpp, %%eax             \n\t"
-            "addl %%eax, %%edi           \n\t" // rp = row + bpp
+// preload  "movl diff, %%edx              \n\t"
+// preload  "mov  row, %1                  \n\t" // edi/rdi
+// preload  "cmpl FullLength, %%edx        \n\t"
+            "cmpl %%eax, %%edx             \n\t"
+            "jnb sub_1end                  \n\t"
+            "mov  %1, " PSI "              \n\t" // lp = row
+// irrel.   "xorl %%ecx, %%ecx             \n\t" // (actually bug with preload)
+// preload  "movl bpp, %%ecx               \n\t"
+            "add  " PCX ", %1              \n\t" // rp = row + bpp
 
-         "sub_1lp:                       \n\t"
-            "movb (%%esi,%%edx,), %%al   \n\t"
-            "addb %%al, (%%edi,%%edx,)   \n\t"
-            "incl %%edx                  \n\t"
-            "cmpl _FullLength, %%edx     \n\t"
-            "jb sub_1lp                  \n\t"
+         "sub_1lp:                         \n\t"
+            "movb (" PSI "," PDX ",), %%cl \n\t"
+            "addb %%cl, (%1," PDX ",)      \n\t"
+            "incl %%edx                    \n\t"
+            "cmpl %%eax, %%edx             \n\t" // compare with FullLength
+            "jb sub_1lp                    \n\t"
 
-         "sub_1end:                      \n\t"
+         "sub_1end:                        \n\t"
 
-            : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-              "=D" (dummy_value_D)    // 1
+            : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+              "=D" (dummy_value_D),   // 1
+              "=d" (dummy_value_d),   // 2
+              "=a" (dummy_value_a)    // 3
 
-            : "0" (bpp),              // eax    // input regs
-              "1" (row)               // edi
+            : "0" (bpp),              // ecx    // input regs
+              "1" (row),              // edi
+              "2" (diff),             // edx
+              "3" (FullLength)        // eax
 
-            : "%edx", "%esi"                    // clobber list
+            : "%esi"                            // clobber list
          );
       }
-      return;
-
-      case 6:
-      case 4:
-      //case 7:   // GRR BOGUS
-      //case 5:   // GRR BOGUS
-      {
-         _ShiftBpp.use = bpp << 3;
-         _ShiftRem.use = 64 - _ShiftBpp.use;
-
-         __asm__ __volatile__ (
-// preload  "movl row, %%edi              \n\t"
-            "movl _dif, %%edx             \n\t"
-            "movl %%edi, %%esi            \n\t" // lp = row
-// preload  "movl bpp, %%eax              \n\t"
-            "addl %%eax, %%edi            \n\t" // rp = row + bpp
-
-            // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%edx,), %%mm1 \n\t"
-
-         "sub_4lp:                        \n\t" // shift data for adding first
-            "psrlq _ShiftRem, %%mm1       \n\t" //  bpp bytes (no need for mask;
-                                                //  shift clears inactive bytes)
-            "movq (%%edi,%%edx,), %%mm0   \n\t"
-            "paddb %%mm1, %%mm0           \n\t"
-
-            // add 2nd active group
-            "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
-            "psllq _ShiftBpp, %%mm1       \n\t" // shift data to pos. correctly
-            "addl $8, %%edx               \n\t"
-            "paddb %%mm1, %%mm0           \n\t"
-
-            "cmpl _MMXLength, %%edx       \n\t"
-            "movq %%mm0, -8(%%edi,%%edx,) \n\t"
-            "movq %%mm0, %%mm1            \n\t" // prep 1st add at top of loop
-            "jb sub_4lp                   \n\t"
-
-            : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-              "=D" (dummy_value_D)    // 1
-
-            : "0" (bpp),              // eax    // input regs
-              "1" (row)               // edi
-
-            : "%edx", "%esi"                    // clobber list
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1"
-#endif
-         );
-      }
-      break;
+      return;  // end 1 bpp (bypassing cleanup block!)
 
       case 2:
       {
-         _ActiveMask.use = 0x00000000ffff0000LL;
-         _ShiftBpp.use = 16;       // == 2 * 8
-         _ShiftRem.use = 48;       // == 64 - 16
+//       _ShiftBpp = 16;       // == 2 * 8
+//       _ShiftRem = 48;       // == 64 - 16
 
          __asm__ __volatile__ (
-            "movq _ActiveMask, %%mm7      \n\t" // load _ActiveMask for 2nd
-                                                //  active byte group
-            "movl _dif, %%edx             \n\t"
+            LOAD_GOT_rbp
+            // load (former) _ActiveMask for 2nd active byte group
+            "movq " AMASK4_2_2 ", %%mm7   \n\t" // _amask4_2_2
+            RESTORE_rbp
+// preload  "movl diff, %%edx             \n\t"
             "movq %%mm7, %%mm6            \n\t"
-// preload  "movl row, %%edi              \n\t"
-            "psllq _ShiftBpp, %%mm6       \n\t" // move mask in mm6 to cover
+// preload  "mov  row, %1                 \n\t" // edi/rdi
+            "psllq $16, %%mm6             \n\t" // move mask in mm6 to cover
                                                 //  3rd active byte group
-            "movl %%edi, %%esi            \n\t" // lp = row
+// notused  "mov  %1, " PSI "             \n\t" // lp = row
             "movq %%mm6, %%mm5            \n\t"
-// preload  "movl bpp, %%eax              \n\t"
-            "addl %%eax, %%edi            \n\t" // rp = row + bpp
-            "psllq _ShiftBpp, %%mm5       \n\t" // move mask in mm5 to cover
+// preload  "movl bpp, %%ecx              \n\t"
+            "add  " PCX ", %1             \n\t" // rp = row + bpp
+            "psllq $16, %%mm5             \n\t" // move mask in mm5 to cover
                                                 //  4th active byte group
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%edx,), %%mm1 \n\t"
+            "movq -8(%1," PDX ",), %%mm1  \n\t"
 
          "sub_2lp:                        \n\t" // shift data for adding first
-            "psrlq _ShiftRem, %%mm1       \n\t" //  bpp bytes (no need for mask;
+            "psrlq $48, %%mm1             \n\t" //  bpp bytes (no need for mask;
                                                 //  shift clears inactive bytes)
             // add 1st active group
-            "movq (%%edi,%%edx,), %%mm0   \n\t"
+            "movq (%1," PDX ",), %%mm0    \n\t"
             "paddb %%mm1, %%mm0           \n\t"
 
             // add 2nd active group
             "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
-            "psllq _ShiftBpp, %%mm1       \n\t" // shift data to pos. correctly
+            "psllq $16, %%mm1             \n\t" // shift data to pos. correctly
             "pand %%mm7, %%mm1            \n\t" // mask to use 2nd active group
             "paddb %%mm1, %%mm0           \n\t"
 
             // add 3rd active group
             "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
-            "psllq _ShiftBpp, %%mm1       \n\t" // shift data to pos. correctly
+            "psllq $16, %%mm1             \n\t" // shift data to pos. correctly
             "pand %%mm6, %%mm1            \n\t" // mask to use 3rd active group
             "paddb %%mm1, %%mm0           \n\t"
 
             // add 4th active group
             "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
-            "psllq _ShiftBpp, %%mm1       \n\t" // shift data to pos. correctly
+            "psllq $16, %%mm1             \n\t" // shift data to pos. correctly
             "pand %%mm5, %%mm1            \n\t" // mask to use 4th active group
             "addl $8, %%edx               \n\t"
             "paddb %%mm1, %%mm0           \n\t"
-            "cmpl _MMXLength, %%edx       \n\t"
-            "movq %%mm0, -8(%%edi,%%edx,) \n\t" // write updated Raws to array
+            "cmpl %%eax, %%edx            \n\t" // MMXLength
+            "movq %%mm0, -8(%1," PDX ",)  \n\t" // write updated Raws to array
             "movq %%mm0, %%mm1            \n\t" // prep 1st add at top of loop
             "jb sub_2lp                   \n\t"
 
-            : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-              "=D" (dummy_value_D)    // 1
+            : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+              "=D" (dummy_value_D),   // 1
+              "=d" (dummy_value_d),   // 2
+              "=a" (dummy_value_a)    // 3
 
-            : "0" (bpp),              // eax    // input regs
-              "1" (row)               // edi
+            : "0" (bpp),              // ecx    // input regs
+              "1" (row),              // edi
+              "2" (diff),             // edx
+              "3" (MMXLength)         // eax
 
-            : "%edx", "%esi"                    // clobber list
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm5", "%mm6", "%mm7"
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1", "%mm5", "%mm6"    // clobber list
+            , "%mm7"
 #endif
          );
       }
-      break;
+      break;  // end 2 bpp
+
+      case 6:   // formerly shared with 4 bpp case (see comments there)
+      {
+//       _ShiftBpp = bpp << 3;        // 48 (psllq)
+//       _ShiftRem = 64 - _ShiftBpp;  // 16 (psrlq)
+
+         __asm__ __volatile__ (
+// preload  "mov  row, %1                 \n\t" // edi/rdi
+// preload  "movl diff, %%edx             \n\t"
+// notused  "mov  %1, " PSI "             \n\t" // lp = row
+// preload  "movl bpp, %%ecx              \n\t"
+            "add  " PCX ", %1             \n\t" // rp = row + bpp
+
+            // prime the pump:  load the first Raw(x-bpp) data set
+            "movq -8(%1," PDX ",), %%mm1  \n\t"
+
+         "sub_6lp:                        \n\t" // shift data for adding first
+            "psrlq $16, %%mm1             \n\t" //  bpp bytes (no need for mask;
+                                                //  shift clears inactive bytes)
+            "movq (%1," PDX ",), %%mm0    \n\t"
+            "paddb %%mm1, %%mm0           \n\t"
+
+            // add 2nd active group
+            "movq %%mm0, %%mm1            \n\t" // mov updated Raws to mm1
+            "psllq $48, %%mm1             \n\t" // shift data to pos. correctly
+            "addl $8, %%edx               \n\t"
+            "paddb %%mm1, %%mm0           \n\t"
+
+            "cmpl %%eax, %%edx            \n\t" // MMXLength
+            "movq %%mm0, -8(%1," PDX ",)  \n\t" // write updated Raws to array
+            "movq %%mm0, %%mm1            \n\t" // prep 1st add at top of loop
+            "jb sub_6lp                   \n\t"
+
+            : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+              "=D" (dummy_value_D),   // 1
+              "=d" (dummy_value_d),   // 2
+              "=a" (dummy_value_a)    // 3
+
+            : "0" (bpp),              // ecx    // input regs
+              "1" (row),              // edi
+              "2" (diff),             // edx
+              "3" (MMXLength)         // eax
+
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            : "%mm0", "%mm1"                    // clobber list
+#endif
+         );
+      }
+      break;  // end 6 bpp
 
       case 8:
       {
          __asm__ __volatile__ (
-// preload  "movl row, %%edi              \n\t"
-            "movl _dif, %%edx             \n\t"
-            "movl %%edi, %%esi            \n\t" // lp = row
-// preload  "movl bpp, %%eax              \n\t"
-            "addl %%eax, %%edi            \n\t" // rp = row + bpp
-            "movl _MMXLength, %%ecx       \n\t"
+// preload  "mov  row, %1                 \n\t" // edi/rdi
+// preload  "movl diff, %%edx             \n\t"
+// notused  "mov  %1, " PSI "             \n\t" // lp = row
+// preload  "movl bpp, %%ecx              \n\t"
+            "add  " PCX ", %1             \n\t" // rp = row + bpp
+// preload  "movl MMXLength, %%eax        \n\t"
 
             // prime the pump:  load the first Raw(x-bpp) data set
-            "movq -8(%%edi,%%edx,), %%mm7 \n\t"
-            "andl $0x0000003f, %%ecx      \n\t" // calc bytes over mult of 64
+            "movq -8(%1," PDX ",), %%mm7  \n\t"
+            "movl %%eax, %%esi            \n\t" // copy of MMXLength -> esi
+            "andl $0x0000003f, %%esi      \n\t" // calc bytes over mult of 64
 
          "sub_8lp:                        \n\t"
-            "movq (%%edi,%%edx,), %%mm0   \n\t" // load Sub(x) for 1st 8 bytes
+            "movq (%1," PDX ",), %%mm0    \n\t" // load Sub(x) for 1st 8 bytes
             "paddb %%mm7, %%mm0           \n\t"
-            "movq 8(%%edi,%%edx,), %%mm1  \n\t" // load Sub(x) for 2nd 8 bytes
-            "movq %%mm0, (%%edi,%%edx,)   \n\t" // write Raw(x) for 1st 8 bytes
+            "movq 8(%1," PDX ",), %%mm1   \n\t" // load Sub(x) for 2nd 8 bytes
+            "movq %%mm0, (%1," PDX ",)    \n\t" // write Raw(x) for 1st 8 bytes
 
             // Now mm0 will be used as Raw(x-bpp) for the 2nd group of 8 bytes.
             // This will be repeated for each group of 8 bytes with the 8th
@@ -4793,130 +5488,118 @@
             // next loop.
 
             "paddb %%mm0, %%mm1           \n\t"
-            "movq 16(%%edi,%%edx,), %%mm2 \n\t" // load Sub(x) for 3rd 8 bytes
-            "movq %%mm1, 8(%%edi,%%edx,)  \n\t" // write Raw(x) for 2nd 8 bytes
+            "movq 16(%1," PDX ",), %%mm2  \n\t" // load Sub(x) for 3rd 8 bytes
+            "movq %%mm1, 8(%1," PDX ",)   \n\t" // write Raw(x) for 2nd 8 bytes
             "paddb %%mm1, %%mm2           \n\t"
-            "movq 24(%%edi,%%edx,), %%mm3 \n\t" // load Sub(x) for 4th 8 bytes
-            "movq %%mm2, 16(%%edi,%%edx,) \n\t" // write Raw(x) for 3rd 8 bytes
+            "movq 24(%1," PDX ",), %%mm3  \n\t" // load Sub(x) for 4th 8 bytes
+            "movq %%mm2, 16(%1," PDX ",)  \n\t" // write Raw(x) for 3rd 8 bytes
             "paddb %%mm2, %%mm3           \n\t"
-            "movq 32(%%edi,%%edx,), %%mm4 \n\t" // load Sub(x) for 5th 8 bytes
-            "movq %%mm3, 24(%%edi,%%edx,) \n\t" // write Raw(x) for 4th 8 bytes
+            "movq 32(%1," PDX ",), %%mm4  \n\t" // load Sub(x) for 5th 8 bytes
+            "movq %%mm3, 24(%1," PDX ",)  \n\t" // write Raw(x) for 4th 8 bytes
             "paddb %%mm3, %%mm4           \n\t"
-            "movq 40(%%edi,%%edx,), %%mm5 \n\t" // load Sub(x) for 6th 8 bytes
-            "movq %%mm4, 32(%%edi,%%edx,) \n\t" // write Raw(x) for 5th 8 bytes
+            "movq 40(%1," PDX ",), %%mm5  \n\t" // load Sub(x) for 6th 8 bytes
+            "movq %%mm4, 32(%1," PDX ",)  \n\t" // write Raw(x) for 5th 8 bytes
             "paddb %%mm4, %%mm5           \n\t"
-            "movq 48(%%edi,%%edx,), %%mm6 \n\t" // load Sub(x) for 7th 8 bytes
-            "movq %%mm5, 40(%%edi,%%edx,) \n\t" // write Raw(x) for 6th 8 bytes
+            "movq 48(%1," PDX ",), %%mm6  \n\t" // load Sub(x) for 7th 8 bytes
+            "movq %%mm5, 40(%1," PDX ",)  \n\t" // write Raw(x) for 6th 8 bytes
             "paddb %%mm5, %%mm6           \n\t"
-            "movq 56(%%edi,%%edx,), %%mm7 \n\t" // load Sub(x) for 8th 8 bytes
-            "movq %%mm6, 48(%%edi,%%edx,) \n\t" // write Raw(x) for 7th 8 bytes
+            "movq 56(%1," PDX ",), %%mm7  \n\t" // load Sub(x) for 8th 8 bytes
+            "movq %%mm6, 48(%1," PDX ",)  \n\t" // write Raw(x) for 7th 8 bytes
             "addl $64, %%edx              \n\t"
             "paddb %%mm6, %%mm7           \n\t"
-            "cmpl %%ecx, %%edx            \n\t"
-            "movq %%mm7, -8(%%edi,%%edx,) \n\t" // write Raw(x) for 8th 8 bytes
+            "cmpl %%esi, %%edx            \n\t" // cmp to bytes over mult of 64
+            "movq %%mm7, -8(%1," PDX ",)  \n\t" // write Raw(x) for 8th 8 bytes
             "jb sub_8lp                   \n\t"
 
-            "cmpl _MMXLength, %%edx       \n\t"
+            "cmpl %%eax, %%edx            \n\t" // compare to MMXLength
             "jnb sub_8lt8                 \n\t"
 
          "sub_8lpA:                       \n\t"
-            "movq (%%edi,%%edx,), %%mm0   \n\t"
+            "movq (%1," PDX ",), %%mm0    \n\t"
             "addl $8, %%edx               \n\t"
             "paddb %%mm7, %%mm0           \n\t"
-            "cmpl _MMXLength, %%edx       \n\t"
-            "movq %%mm0, -8(%%edi,%%edx,) \n\t" // -8 to offset early addl edx
+            "cmpl %%eax, %%edx            \n\t" // compare to MMXLength
+            "movq %%mm0, -8(%1," PDX ",)  \n\t" // -8 to offset early addl edx
             "movq %%mm0, %%mm7            \n\t" // move calculated Raw(x) data
-                                                //  to mm1 to be new Raw(x-bpp)
+            "jb sub_8lpA                  \n\t" //  to mm7 to be new Raw(x-bpp)
                                                 //  for next loop
-            "jb sub_8lpA                  \n\t"
-
          "sub_8lt8:                       \n\t"
 
-            : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-              "=D" (dummy_value_D)    // 1
+            : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+              "=D" (dummy_value_D),   // 1
+              "=d" (dummy_value_d),   // 2
+              "=a" (dummy_value_a)    // 3
 
-            : "0" (bpp),              // eax    // input regs
-              "1" (row)               // edi
+            : "0" (bpp),              // ecx    // input regs
+              "1" (row),              // edi
+              "2" (diff),             // edx
+              "3" (MMXLength)         // eax
 
-            : "%ecx", "%edx", "%esi"            // clobber list
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
+            : "%esi"                            // clobber list
+#if defined(CLOBBER_MMX_REGS_SUPPORTED)
+            , "%mm0", "%mm1", "%mm2", "%mm3"
+            , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
          );
       }
-      break;
+      break;  // end 8 bpp
 
-      default:                // bpp greater than 8 bytes   GRR BOGUS
+      default:                // bpp != 1,2,3,4,6,8:  doesn't exist
       {
-         __asm__ __volatile__ (
-            "movl _dif, %%edx             \n\t"
-// preload  "movl row, %%edi              \n\t"
-            "movl %%edi, %%esi            \n\t" // lp = row
-// preload  "movl bpp, %%eax              \n\t"
-            "addl %%eax, %%edi            \n\t" // rp = row + bpp
-
-         "sub_Alp:                        \n\t"
-            "movq (%%edi,%%edx,), %%mm0   \n\t"
-            "movq (%%esi,%%edx,), %%mm1   \n\t"
-            "addl $8, %%edx               \n\t"
-            "paddb %%mm1, %%mm0           \n\t"
-            "cmpl _MMXLength, %%edx       \n\t"
-            "movq %%mm0, -8(%%edi,%%edx,) \n\t" // mov does not affect flags;
-                                                //  -8 to offset addl edx
-            "jb sub_Alp                   \n\t"
-
-            : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-              "=D" (dummy_value_D)    // 1
-
-            : "0" (bpp),              // eax    // input regs
-              "1" (row)               // edi
-
-            : "%edx", "%esi"                    // clobber list
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
-            , "%mm0", "%mm1"
+         // ERROR:  SHOULD NEVER BE REACHED
+#if defined(PNG_DEBUG)
+         png_debug(1, "Internal libpng logic error (GCC "
+           "png_read_filter_row_mmx_sub())\n");
 #endif
-         );
       }
       break;
 
    } // end switch (bpp)
 
    __asm__ __volatile__ (
-      "movl _MMXLength, %%edx       \n\t"
-//pre "movl row, %%edi              \n\t"
-      "cmpl _FullLength, %%edx      \n\t"
-      "jnb sub_end                  \n\t"
+//pre "movl MMXLength, %%eax         \n\t"
+//pre "mov  row, %1                  \n\t" // edi/rdi
+//pre "cmpl FullLength, %%eax        \n\t"
+      "cmpl %%edx, %%eax             \n\t"
+      "jnb sub_end                   \n\t"
 
-      "movl %%edi, %%esi            \n\t" // lp = row
-//pre "movl bpp, %%eax              \n\t"
-      "addl %%eax, %%edi            \n\t" // rp = row + bpp
-      "xorl %%eax, %%eax            \n\t"
+      "mov  %1, " PSI "              \n\t" // lp = row
+//pre "movl bpp, %%ecx               \n\t"
+      "add  " PCX ", %1              \n\t" // rp = row + bpp
+      "xorl %%ecx, %%ecx             \n\t"
 
-   "sub_lp2:                        \n\t"
-      "movb (%%esi,%%edx,), %%al    \n\t"
-      "addb %%al, (%%edi,%%edx,)    \n\t"
-      "incl %%edx                   \n\t"
-      "cmpl _FullLength, %%edx      \n\t"
-      "jb sub_lp2                   \n\t"
+   "sub_lp2:                         \n\t"
+      "movb (" PSI "," PAX ",), %%cl \n\t"
+      "addb %%cl, (%1," PAX ",)      \n\t"
+      "incl %%eax                    \n\t"
+      "cmpl %%edx, %%eax             \n\t" // FullLength
+      "jb sub_lp2                    \n\t"
 
-   "sub_end:                        \n\t"
-      "EMMS                         \n\t" // end MMX instructions
+   "sub_end:                         \n\t"
+      "EMMS                          \n\t" // end MMX instructions
 
-      : "=a" (dummy_value_a),   // 0      // output regs (dummy)
-        "=D" (dummy_value_D)    // 1
+      : "=c" (dummy_value_c),   // 0      // output regs (dummy)
+        "=D" (dummy_value_D),   // 1
+        "=a" (dummy_value_a),   // 2
+        "=d" (dummy_value_d)    // 3
 
-      : "0" (bpp),              // eax    // input regs
-        "1" (row)               // edi
+      : "0" (bpp),              // ecx    // input regs
+        "1" (row),              // edi
+        "2" (MMXLength),        // eax
+        "3" (FullLength)        // edx
 
-      : "%edx", "%esi"                    // clobber list
+      : "%esi"                            // clobber list
    );
 
 } // end of png_read_filter_row_mmx_sub()
-#endif
+
+#endif /* PNG_MMX_READ_FILTER_SUB_SUPPORTED */
 
 
 
 
+#if defined(PNG_MMX_READ_FILTER_UP_SUPPORTED)
+
 //===========================================================================//
 //                                                                           //
 //            P N G _ R E A D _ F I L T E R _ R O W _ M M X _ U P            //
@@ -4929,136 +5612,133 @@
 png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
                            png_bytep prev_row)
 {
-   png_uint_32 len;
+   unsigned len;        // png_uint_32 is actually 64-bit on x86-64
    int dummy_value_d;   // fix 'forbidden register 3 (dx) was spilled' error
-   int dummy_value_S;
-   int dummy_value_D;
+   png_bytep dummy_value_S;
+   png_bytep dummy_value_D;
 
    len = row_info->rowbytes;              // number of bytes to filter
 
    __asm__ __volatile__ (
-//pre "movl row, %%edi              \n\t"
-      // get # of bytes to alignment
-#ifdef __PIC__
-      "pushl %%ebx                  \n\t"
-#endif
-      "movl %%edi, %%ecx            \n\t"
-      "xorl %%ebx, %%ebx            \n\t"
-      "addl $0x7, %%ecx             \n\t"
-      "xorl %%eax, %%eax            \n\t"
-      "andl $0xfffffff8, %%ecx      \n\t"
-//pre "movl prev_row, %%esi         \n\t"
-      "subl %%edi, %%ecx            \n\t"
-      "jz up_go                     \n\t"
+      SAVE_GOT_ebx
+//pre "mov  prev_row, %1           \n\t" // esi/rsi
+//pre "movl row, %2                \n\t" // edi/rdi
 
-   "up_lp1:                         \n\t" // fix alignment
-      "movb (%%edi,%%ebx,), %%al    \n\t"
-      "addb (%%esi,%%ebx,), %%al    \n\t"
-      "incl %%ebx                   \n\t"
-      "cmpl %%ecx, %%ebx            \n\t"
-      "movb %%al, -1(%%edi,%%ebx,)  \n\t" // mov does not affect flags; -1 to
-      "jb up_lp1                    \n\t" //  offset incl ebx
+      "xorl %%ebx, %%ebx           \n\t"
+      "xorl %%eax, %%eax           \n\t"
 
-   "up_go:                          \n\t"
-//pre "movl len, %%edx              \n\t"
-      "movl %%edx, %%ecx            \n\t"
-      "subl %%ebx, %%edx            \n\t" // subtract alignment fix
-      "andl $0x0000003f, %%edx      \n\t" // calc bytes over mult of 64
-      "subl %%edx, %%ecx            \n\t" // drop over bytes from length
+      // get # of bytes to alignment (note:  computing _delta_ of two pointers,
+      // so hereafter %%ecx is sufficient even on 64-bit)
+      "mov  %2, " PCX "            \n\t" // take start of row
+      "add  $0x7, " PCX "          \n\t" // add 7 to incr past alignment bdry
+//    "andl $0xfffffff8, %%ecx     \n\t" // mask to alignment boundary (32-bit!)
+      CLEAR_BOTTOM_3_BITS  PCX    "\n\t" // mask to alignment boundary
+      "sub  %2, " PCX "            \n\t" // subtract row ptr again => ebp =
+      "jz up_go                    \n\t" //  target value of ecx at alignment
+
+   "up_lp1:                        \n\t" // fix alignment
+      "movb (%2," PBX ",), %%al    \n\t"
+      "addb (%1," PBX ",), %%al    \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movb %%al, -1(%2," PBX ",)  \n\t" // mov does not affect flags; -1 to
+      "jb up_lp1                   \n\t" //  offset incl ebx
+
+   "up_go:                         \n\t"
+//pre "movl len, %%edx             \n\t"
+      "movl %%edx, %%ecx           \n\t"
+      "subl %%ebx, %%edx           \n\t" // subtract alignment fix
+      "andl $0x0000003f, %%edx     \n\t" // calc bytes over mult of 64
+      "subl %%edx, %%ecx           \n\t" // sub over-bytes from original length
 
       // unrolled loop - use all MMX registers and interleave to reduce
       // number of branch instructions (loops) and reduce partial stalls
-   "up_loop:                        \n\t"
-      "movq (%%esi,%%ebx,), %%mm1   \n\t"
-      "movq (%%edi,%%ebx,), %%mm0   \n\t"
-      "movq 8(%%esi,%%ebx,), %%mm3  \n\t"
-      "paddb %%mm1, %%mm0           \n\t"
-      "movq 8(%%edi,%%ebx,), %%mm2  \n\t"
-      "movq %%mm0, (%%edi,%%ebx,)   \n\t"
-      "paddb %%mm3, %%mm2           \n\t"
-      "movq 16(%%esi,%%ebx,), %%mm5 \n\t"
-      "movq %%mm2, 8(%%edi,%%ebx,)  \n\t"
-      "movq 16(%%edi,%%ebx,), %%mm4 \n\t"
-      "movq 24(%%esi,%%ebx,), %%mm7 \n\t"
-      "paddb %%mm5, %%mm4           \n\t"
-      "movq 24(%%edi,%%ebx,), %%mm6 \n\t"
-      "movq %%mm4, 16(%%edi,%%ebx,) \n\t"
-      "paddb %%mm7, %%mm6           \n\t"
-      "movq 32(%%esi,%%ebx,), %%mm1 \n\t"
-      "movq %%mm6, 24(%%edi,%%ebx,) \n\t"
-      "movq 32(%%edi,%%ebx,), %%mm0 \n\t"
-      "movq 40(%%esi,%%ebx,), %%mm3 \n\t"
-      "paddb %%mm1, %%mm0           \n\t"
-      "movq 40(%%edi,%%ebx,), %%mm2 \n\t"
-      "movq %%mm0, 32(%%edi,%%ebx,) \n\t"
-      "paddb %%mm3, %%mm2           \n\t"
-      "movq 48(%%esi,%%ebx,), %%mm5 \n\t"
-      "movq %%mm2, 40(%%edi,%%ebx,) \n\t"
-      "movq 48(%%edi,%%ebx,), %%mm4 \n\t"
-      "movq 56(%%esi,%%ebx,), %%mm7 \n\t"
-      "paddb %%mm5, %%mm4           \n\t"
-      "movq 56(%%edi,%%ebx,), %%mm6 \n\t"
-      "movq %%mm4, 48(%%edi,%%ebx,) \n\t"
-      "addl $64, %%ebx              \n\t"
-      "paddb %%mm7, %%mm6           \n\t"
-      "cmpl %%ecx, %%ebx            \n\t"
-      "movq %%mm6, -8(%%edi,%%ebx,) \n\t" // (+56)movq does not affect flags;
-      "jb up_loop                   \n\t" //  -8 to offset addl ebx
+   "up_loop:                       \n\t"
+      "movq (%1," PBX ",), %%mm1   \n\t"
+      "movq (%2," PBX ",), %%mm0   \n\t"
+      "movq 8(%1," PBX ",), %%mm3  \n\t"
+      "paddb %%mm1, %%mm0          \n\t"
+      "movq 8(%2," PBX ",), %%mm2  \n\t"
+      "movq %%mm0, (%2," PBX ",)   \n\t"
+      "paddb %%mm3, %%mm2          \n\t"
+      "movq 16(%1," PBX ",), %%mm5 \n\t"
+      "movq %%mm2, 8(%2," PBX ",)  \n\t"
+      "movq 16(%2," PBX ",), %%mm4 \n\t"
+      "movq 24(%1," PBX ",), %%mm7 \n\t"
+      "paddb %%mm5, %%mm4          \n\t"
+      "movq 24(%2," PBX ",), %%mm6 \n\t"
+      "movq %%mm4, 16(%2," PBX ",) \n\t"
+      "paddb %%mm7, %%mm6          \n\t"
+      "movq 32(%1," PBX ",), %%mm1 \n\t"
+      "movq %%mm6, 24(%2," PBX ",) \n\t"
+      "movq 32(%2," PBX ",), %%mm0 \n\t"
+      "movq 40(%1," PBX ",), %%mm3 \n\t"
+      "paddb %%mm1, %%mm0          \n\t"
+      "movq 40(%2," PBX ",), %%mm2 \n\t"
+      "movq %%mm0, 32(%2," PBX ",) \n\t"
+      "paddb %%mm3, %%mm2          \n\t"
+      "movq 48(%1," PBX ",), %%mm5 \n\t"
+      "movq %%mm2, 40(%2," PBX ",) \n\t"
+      "movq 48(%2," PBX ",), %%mm4 \n\t"
+      "movq 56(%1," PBX ",), %%mm7 \n\t"
+      "paddb %%mm5, %%mm4          \n\t"
+      "movq 56(%2," PBX ",), %%mm6 \n\t"
+      "movq %%mm4, 48(%2," PBX ",) \n\t"
+      "addl $64, %%ebx             \n\t"
+      "paddb %%mm7, %%mm6          \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movq %%mm6, -8(%2," PBX ",) \n\t" // (+56)movq does not affect flags;
+      "jb up_loop                  \n\t" //  -8 to offset addl ebx
 
-      "cmpl $0, %%edx               \n\t" // test for bytes over mult of 64
-      "jz up_end                    \n\t"
+      "cmpl $0, %%edx              \n\t" // test for bytes over mult of 64
+      "jz up_end                   \n\t"
 
-      "cmpl $8, %%edx               \n\t" // test for less than 8 bytes
-      "jb up_lt8                    \n\t" //  [added by lcreeve at netins.net]
+      "cmpl $8, %%edx              \n\t" // test for less than 8 bytes
+      "jb up_lt8                   \n\t" //  [added by lcreeve at netins.net]
 
-      "addl %%edx, %%ecx            \n\t"
-      "andl $0x00000007, %%edx      \n\t" // calc bytes over mult of 8
-      "subl %%edx, %%ecx            \n\t" // drop over bytes from length
-      "jz up_lt8                    \n\t"
+      "addl %%edx, %%ecx           \n\t"
+      "andl $0x00000007, %%edx     \n\t" // calc bytes over mult of 8
+      "subl %%edx, %%ecx           \n\t" // drop over-bytes from length
+      "jz up_lt8                   \n\t"
 
-   "up_lpA:                         \n\t" // use MMX regs to update 8 bytes sim.
-      "movq (%%esi,%%ebx,), %%mm1   \n\t"
-      "movq (%%edi,%%ebx,), %%mm0   \n\t"
-      "addl $8, %%ebx               \n\t"
-      "paddb %%mm1, %%mm0           \n\t"
-      "cmpl %%ecx, %%ebx            \n\t"
-      "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // movq does not affect flags; -8 to
-      "jb up_lpA                    \n\t" //  offset add ebx
-      "cmpl $0, %%edx               \n\t" // test for bytes over mult of 8
-      "jz up_end                    \n\t"
+   "up_lpA:                        \n\t" // use MMX regs to update 8 bytes sim.
+      "movq (%1," PBX ",), %%mm1   \n\t"
+      "movq (%2," PBX ",), %%mm0   \n\t"
+      "addl $8, %%ebx              \n\t"
+      "paddb %%mm1, %%mm0          \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movq %%mm0, -8(%2," PBX ",) \n\t" // movq does not affect flags; -8 to
+      "jb up_lpA                   \n\t" //  offset add ebx
+      "cmpl $0, %%edx              \n\t" // test for bytes over mult of 8
+      "jz up_end                   \n\t"
 
-   "up_lt8:                         \n\t"
-      "xorl %%eax, %%eax            \n\t"
-      "addl %%edx, %%ecx            \n\t" // move over byte count into counter
+   "up_lt8:                        \n\t"
+      "xorl %%eax, %%eax           \n\t"
+      "addl %%edx, %%ecx           \n\t" // move over byte count into counter
 
-   "up_lp2:                         \n\t" // use x86 regs for remaining bytes
-      "movb (%%edi,%%ebx,), %%al    \n\t"
-      "addb (%%esi,%%ebx,), %%al    \n\t"
-      "incl %%ebx                   \n\t"
-      "cmpl %%ecx, %%ebx            \n\t"
-      "movb %%al, -1(%%edi,%%ebx,)  \n\t" // mov does not affect flags; -1 to
-      "jb up_lp2                    \n\t" //  offset inc ebx
+   "up_lp2:                        \n\t" // use x86 regs for remaining bytes
+      "movb (%2," PBX ",), %%al    \n\t"
+      "addb (%1," PBX ",), %%al    \n\t"
+      "incl %%ebx                  \n\t"
+      "cmpl %%ecx, %%ebx           \n\t"
+      "movb %%al, -1(%2," PBX ",)  \n\t" // mov does not affect flags; -1 to
+      "jb up_lp2                   \n\t" //  offset inc ebx
 
-   "up_end:                         \n\t"
-      "EMMS                         \n\t" // conversion of filtered row complete
-#ifdef __PIC__
-      "popl %%ebx                   \n\t"
-#endif
+   "up_end:                        \n\t"
+      "EMMS                        \n\t" // conversion of filtered row complete
+      RESTORE_GOT_ebx
 
-      : "=d" (dummy_value_d),   // 0      // output regs (dummy)
+      : "=d" (dummy_value_d),   // 0     // output regs (dummy)
         "=S" (dummy_value_S),   // 1
         "=D" (dummy_value_D)    // 2
 
-      : "0" (len),              // edx    // input regs
+      : "0" (len),              // edx   // input regs
         "1" (prev_row),         // esi
         "2" (row)               // edi
 
-      : "%eax", "%ecx"            // clobber list (no input regs!)
-#ifndef __PIC__
-      , "%ebx"
-#endif
-
-#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
+      : "%eax", "%ecx"                   // clobber list (no input regs!)
+        _CLOBBER_GOT_ebx
+#if defined(PNG_CLOBBER_MMX_REGS_SUPPORTED)
       , "%mm0", "%mm1", "%mm2", "%mm3"
       , "%mm4", "%mm5", "%mm6", "%mm7"
 #endif
@@ -5066,7 +5746,7 @@
 
 } // end of png_read_filter_row_mmx_up()
 
-#endif /* PNG_MMX_CODE_SUPPORTED */
+#endif /* PNG_MMX_READ_FILTER_UP_SUPPORTED */
 
 
 
@@ -5077,79 +5757,102 @@
 /*                                                                           */
 /*===========================================================================*/
 
-
 /* Optimized png_read_filter_row routines */
 
 void /* PRIVATE */
 png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
    row, png_bytep prev_row, int filter)
 {
-#ifdef PNG_DEBUG
-   char filnm[10];
+#if defined(PNG_DEBUG)
+   char filtname[10];
 #endif
 
-#if defined(PNG_MMX_CODE_SUPPORTED)
-/* GRR:  these are superseded by png_ptr->asm_flags: */
-#define UseMMX_sub    1   // GRR:  converted 20000730
-#define UseMMX_up     1   // GRR:  converted 20000729
-#define UseMMX_avg    1   // GRR:  converted 20000828 (+ 16-bit bugfix 20000916)
-#define UseMMX_paeth  1   // GRR:  converted 20000828
-
    if (_mmx_supported == 2) {
-       /* this should have happened in png_init_mmx_flags() already */
 #if !defined(PNG_1_0_X)
+       /* this should have happened in png_init_mmx_flags() already */
        png_warning(png_ptr, "asm_flags may not have been initialized");
 #endif
        png_mmx_support();
    }
-#endif /* PNG_MMX_CODE_SUPPORTED */
 
-#ifdef PNG_DEBUG
+#if defined(PNG_DEBUG)
    png_debug(1, "in png_read_filter_row (pnggccrd.c)\n");
    switch (filter)
    {
-      case 0: sprintf(filnm, "none");
+      case 0:
+         png_snprintf(filtname, 10, "none");
          break;
-      case 1: sprintf(filnm, "sub-%s",
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
+
+      case 1:
+         png_snprintf(filtname, 10, "sub-%s",
+#ifdef PNG_MMX_READ_FILTER_SUB_SUPPORTED
 #if !defined(PNG_1_0_X)
-        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB)? "MMX" :
+           ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB) &&
+            (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
+            (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold)) ? "MMX" : 
+#else
+           _mmx_supported
 #endif
 #endif
-"x86");
+           "C");
          break;
-      case 2: sprintf(filnm, "up-%s",
-#ifdef PNG_MMX_CODE_SUPPORTED
+
+      case 2:
+         png_snprintf(filtname, 10, "up-%s",
+#ifdef PNG_MMX_READ_FILTER_UP_SUPPORTED
 #if !defined(PNG_1_0_X)
-        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP)? "MMX" :
+           ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP) &&
+            (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
+            (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold)) ? "MMX" :
+#else
+           _mmx_supported
 #endif
 #endif
- "x86");
+           "C");
          break;
-      case 3: sprintf(filnm, "avg-%s",
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
+
+      case 3:
+         png_snprintf(filtname, 10, "avg-%s",
+#ifdef PNG_MMX_READ_FILTER_AVG_SUPPORTED
 #if !defined(PNG_1_0_X)
-        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG)? "MMX" :
+           ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG) &&
+            (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
+            (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold)) ? "MMX" : 
+#else
+           _mmx_supported
 #endif
 #endif
- "x86");
+           "C");
          break;
-      case 4: sprintf(filnm, "Paeth-%s",
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
+
+      case 4:
+         png_snprintf(filtname, 10, "paeth-%s",
+#ifdef PNG_MMX_READ_FILTER_PAETH_SUPPORTED
+#if defined(PNG_x86_64_USE_GOTPCREL) || defined(PNG_THREAD_UNSAFE_OK)
 #if !defined(PNG_1_0_X)
-        (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH)? "MMX":
+           ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH) &&
+            (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
+            (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
+#else
+           _mmx_supported
 #endif
+           ? "MMX" :
+#endif /* PNG_x86_64_USE_GOTPCREL || PNG_THREAD_UNSAFE_OK */
 #endif
-"x86");
+             "C");
          break;
-      default: sprintf(filnm, "unknw");
+
+      default:
+         png_snprintf(filtname, 10, "unknown");
          break;
    }
-   png_debug2(0, "row_number=%5ld, %5s, ", png_ptr->row_number, filnm);
-   png_debug1(0, "row=0x%08lx, ", (unsigned long)row);
-   png_debug2(0, "pixdepth=%2d, bytes=%d, ", (int)row_info->pixel_depth,
+   png_debug2(2, "row_number=%ld, %s, ", png_ptr->row_number, filtname);
+   //png_debug1(0, "png_ptr=%10p, ", png_ptr);
+   //png_debug1(0, "asm_flags=0x%08lx, ", png_ptr->asm_flags);
+   png_debug1(0, "row=%10p, ", row);
+   png_debug2(0, "pixdepth=%d, bytes=%d, ", (int)row_info->pixel_depth,
       (int)((row_info->pixel_depth + 7) >> 3));
-   png_debug1(0,"rowbytes=%8ld\n", row_info->rowbytes);
+   png_debug1(0, "rowbytes=%ld\n", row_info->rowbytes);
 #endif /* PNG_DEBUG */
 
    switch (filter)
@@ -5158,7 +5861,7 @@
          break;
 
       case PNG_FILTER_VALUE_SUB:
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
+#ifdef PNG_MMX_READ_FILTER_SUB_SUPPORTED
 #if !defined(PNG_1_0_X)
          if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB) &&
              (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
@@ -5170,7 +5873,7 @@
             png_read_filter_row_mmx_sub(row_info, row);
          }
          else
-#endif /* PNG_MMX_CODE_SUPPORTED */
+#endif
          {
             png_uint_32 i;
             png_uint_32 istop = row_info->rowbytes;
@@ -5187,7 +5890,7 @@
          break;
 
       case PNG_FILTER_VALUE_UP:
-#if defined(PNG_MMX_CODE_SUPPORTED)
+#ifdef PNG_MMX_READ_FILTER_UP_SUPPORTED
 #if !defined(PNG_1_0_X)
          if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP) &&
              (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
@@ -5199,7 +5902,7 @@
             png_read_filter_row_mmx_up(row_info, row, prev_row);
          }
           else
-#endif /* PNG_MMX_CODE_SUPPORTED */
+#endif
          {
             png_uint_32 i;
             png_uint_32 istop = row_info->rowbytes;
@@ -5215,7 +5918,7 @@
          break;
 
       case PNG_FILTER_VALUE_AVG:
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
+#ifdef PNG_MMX_READ_FILTER_AVG_SUPPORTED
 #if !defined(PNG_1_0_X)
          if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG) &&
              (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
@@ -5227,7 +5930,7 @@
             png_read_filter_row_mmx_avg(row_info, row, prev_row);
          }
          else
-#endif /* PNG_MMX_CODE_SUPPORTED */
+#endif
          {
             png_uint_32 i;
             png_bytep rp = row;
@@ -5253,7 +5956,8 @@
          break;
 
       case PNG_FILTER_VALUE_PAETH:
-#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
+#ifdef PNG_MMX_READ_FILTER_PAETH_SUPPORTED
+#if defined(PNG_x86_64_USE_GOTPCREL) || defined(PNG_THREAD_UNSAFE_OK)
 #if !defined(PNG_1_0_X)
          if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH) &&
              (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
@@ -5265,7 +5969,8 @@
             png_read_filter_row_mmx_paeth(row_info, row, prev_row);
          }
          else
-#endif /* PNG_MMX_CODE_SUPPORTED */
+#endif /* PNG_x86_64_USE_GOTPCREL || PNG_THREAD_UNSAFE_OK */
+#endif
          {
             png_uint_32 i;
             png_bytep rp = row;
@@ -5292,7 +5997,7 @@
                p = b - c;
                pc = a - c;
 
-#ifdef PNG_USE_ABS
+#if defined(PNG_USE_ABS)
                pa = abs(p);
                pb = abs(pc);
                pc = abs(p + pc);
@@ -5329,92 +6034,5 @@
 #endif /* PNG_HAVE_MMX_READ_FILTER_ROW */
 
 
-/*===========================================================================*/
-/*                                                                           */
-/*                      P N G _ M M X _ S U P P O R T                        */
-/*                                                                           */
-/*===========================================================================*/
-
-/* GRR NOTES:  (1) the following code assumes 386 or better (pushfl/popfl)
- *             (2) all instructions compile with gcc 2.7.2.3 and later
- *             (3) the function is moved down here to prevent gcc from
- *                  inlining it in multiple places and then barfing be-
- *                  cause the ".NOT_SUPPORTED" label is multiply defined
- *             [is there a way to signal that a *single* function should
- *              not be inlined?  is there a way to modify the label for
- *              each inlined instance, e.g., by appending _1, _2, etc.?
- *              maybe if don't use leading "." in label name? (nope...sigh)]
- */
-
-int PNGAPI
-png_mmx_support(void)
-{
-#if defined(PNG_MMX_CODE_SUPPORTED)
-    int result;
-    __asm__ __volatile__ (
-        "pushl %%ebx          \n\t"  // ebx gets clobbered by CPUID instruction
-        "pushl %%ecx          \n\t"  // so does ecx...
-        "pushl %%edx          \n\t"  // ...and edx (but ecx & edx safe on Linux)
-//      ".byte  0x66          \n\t"  // convert 16-bit pushf to 32-bit pushfd
-//      "pushf                \n\t"  // 16-bit pushf
-        "pushfl               \n\t"  // save Eflag to stack
-        "popl %%eax           \n\t"  // get Eflag from stack into eax
-        "movl %%eax, %%ecx    \n\t"  // make another copy of Eflag in ecx
-        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
-        "pushl %%eax          \n\t"  // save modified Eflag back to stack
-//      ".byte  0x66          \n\t"  // convert 16-bit popf to 32-bit popfd
-//      "popf                 \n\t"  // 16-bit popf
-        "popfl                \n\t"  // restore modified value to Eflag reg
-        "pushfl               \n\t"  // save Eflag to stack
-        "popl %%eax           \n\t"  // get Eflag from stack
-        "pushl %%ecx          \n\t"  // save original Eflag to stack
-        "popfl                \n\t"  // restore original Eflag
-        "xorl %%ecx, %%eax    \n\t"  // compare new Eflag with original Eflag
-        "jz 0f                \n\t"  // if same, CPUID instr. is not supported
-
-        "xorl %%eax, %%eax    \n\t"  // set eax to zero
-//      ".byte  0x0f, 0xa2    \n\t"  // CPUID instruction (two-byte opcode)
-        "cpuid                \n\t"  // get the CPU identification info
-        "cmpl $1, %%eax       \n\t"  // make sure eax return non-zero value
-        "jl 0f                \n\t"  // if eax is zero, MMX is not supported
-
-        "xorl %%eax, %%eax    \n\t"  // set eax to zero and...
-        "incl %%eax           \n\t"  // ...increment eax to 1.  This pair is
-                                     // faster than the instruction "mov eax, 1"
-        "cpuid                \n\t"  // get the CPU identification info again
-        "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
-        "cmpl $0, %%edx       \n\t"  // 0 = MMX not supported
-        "jz 0f                \n\t"  // non-zero = yes, MMX IS supported
-
-        "movl $1, %%eax       \n\t"  // set return value to 1
-        "jmp  1f              \n\t"  // DONE:  have MMX support
-
-    "0:                       \n\t"  // .NOT_SUPPORTED: target label for jump instructions
-        "movl $0, %%eax       \n\t"  // set return value to 0
-    "1:                       \n\t"  // .RETURN: target label for jump instructions
-        "popl %%edx           \n\t"  // restore edx
-        "popl %%ecx           \n\t"  // restore ecx
-        "popl %%ebx           \n\t"  // restore ebx
-
-//      "ret                  \n\t"  // DONE:  no MMX support
-                                     // (fall through to standard C "ret")
-
-        : "=a" (result)              // output list
-
-        :                            // any variables used on input (none)
-
-                                     // no clobber list
-//      , "%ebx", "%ecx", "%edx"     // GRR:  we handle these manually
-//      , "memory"   // if write to a variable gcc thought was in a reg
-//      , "cc"       // "condition codes" (flag bits)
-    );
-    _mmx_supported = result;
-#else
-    _mmx_supported = 0;
-#endif /* PNG_MMX_CODE_SUPPORTED */
-
-    return _mmx_supported;
-}
-
-
-#endif /* PNG_USE_PNGGCCRD */
+#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGGCCRD */
+#endif /* __GNUC__ */
diff --git a/pngget.c b/pngget.c
index 036e9af..75d2ca0 100644
--- a/pngget.c
+++ b/pngget.c
@@ -512,8 +512,11 @@
              png_sPLT_tpp spalettes)
 {
    if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
+   {
      *spalettes = info_ptr->splt_palettes;
-   return ((png_uint_32)info_ptr->splt_palettes_num);
+     return ((png_uint_32)info_ptr->splt_palettes_num);
+   }
+   return (0);
 }
 #endif
 
@@ -800,8 +803,11 @@
              png_unknown_chunkpp unknowns)
 {
    if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
+   {
      *unknowns = info_ptr->unknown_chunks;
-   return ((png_uint_32)info_ptr->unknown_chunks_num);
+     return ((png_uint_32)info_ptr->unknown_chunks_num);
+   }
+   return (0);
 }
 #endif
 
@@ -951,5 +957,6 @@
     return (png_ptr? png_ptr->user_height_max : 0);
 }
 #endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
+ 
 
 #endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/pngpread.c b/pngpread.c
index 1470b0f..2ef6212 100644
--- a/pngpread.c
+++ b/pngpread.c
@@ -1,7 +1,7 @@
 
 /* pngpread.c - read a png file in push mode
  *
- * Last changed in libpng 1.2.17 May 15, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -137,60 +137,60 @@
 png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
-      PNG_IHDR;
-      PNG_IDAT;
-      PNG_IEND;
-      PNG_PLTE;
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
 #if defined(PNG_READ_bKGD_SUPPORTED)
-      PNG_bKGD;
+      PNG_CONST PNG_bKGD;
 #endif
 #if defined(PNG_READ_cHRM_SUPPORTED)
-      PNG_cHRM;
+      PNG_CONST PNG_cHRM;
 #endif
 #if defined(PNG_READ_gAMA_SUPPORTED)
-      PNG_gAMA;
+      PNG_CONST PNG_gAMA;
 #endif
 #if defined(PNG_READ_hIST_SUPPORTED)
-      PNG_hIST;
+      PNG_CONST PNG_hIST;
 #endif
 #if defined(PNG_READ_iCCP_SUPPORTED)
-      PNG_iCCP;
+      PNG_CONST PNG_iCCP;
 #endif
 #if defined(PNG_READ_iTXt_SUPPORTED)
-      PNG_iTXt;
+      PNG_CONST PNG_iTXt;
 #endif
 #if defined(PNG_READ_oFFs_SUPPORTED)
-      PNG_oFFs;
+      PNG_CONST PNG_oFFs;
 #endif
 #if defined(PNG_READ_pCAL_SUPPORTED)
-      PNG_pCAL;
+      PNG_CONST PNG_pCAL;
 #endif
 #if defined(PNG_READ_pHYs_SUPPORTED)
-      PNG_pHYs;
+      PNG_CONST PNG_pHYs;
 #endif
 #if defined(PNG_READ_sBIT_SUPPORTED)
-      PNG_sBIT;
+      PNG_CONST PNG_sBIT;
 #endif
 #if defined(PNG_READ_sCAL_SUPPORTED)
-      PNG_sCAL;
+      PNG_CONST PNG_sCAL;
 #endif
 #if defined(PNG_READ_sRGB_SUPPORTED)
-      PNG_sRGB;
+      PNG_CONST PNG_sRGB;
 #endif
 #if defined(PNG_READ_sPLT_SUPPORTED)
-      PNG_sPLT;
+      PNG_CONST PNG_sPLT;
 #endif
 #if defined(PNG_READ_tEXt_SUPPORTED)
-      PNG_tEXt;
+      PNG_CONST PNG_tEXt;
 #endif
 #if defined(PNG_READ_tIME_SUPPORTED)
-      PNG_tIME;
+      PNG_CONST PNG_tIME;
 #endif
 #if defined(PNG_READ_tRNS_SUPPORTED)
-      PNG_tRNS;
+      PNG_CONST PNG_tRNS;
 #endif
 #if defined(PNG_READ_zTXt_SUPPORTED)
-      PNG_zTXt;
+      PNG_CONST PNG_zTXt;
 #endif
 #endif /* PNG_USE_LOCAL_ARRAYS */
    /* First we make sure we have enough data for the 4 byte chunk name
@@ -660,7 +660,7 @@
 png_push_read_IDAT(png_structp png_ptr)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
-   PNG_IDAT;
+   PNG_CONST PNG_IDAT;
 #endif
    if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
    {
@@ -990,25 +990,25 @@
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
 
    /* start of interlace block */
-   const int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+   PNG_CONST int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
 
    /* offset to next interlace block */
-   const int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
 
    /* start of interlace block in the y direction */
-   const int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+   PNG_CONST int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
 
    /* offset to next interlace block in the y direction */
-   const int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+   PNG_CONST int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
 
    /* Width of interlace block.  This is not currently used - if you need
     * it, uncomment it here and in png.h
-   const int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
+   PNG_CONST int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
    */
 
    /* Height of interlace block.  This is not currently used - if you need
     * it, uncomment it here and in png.h
-   const int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
    */
 #endif
 
@@ -1062,8 +1062,7 @@
    if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
       {
          png_error(png_ptr, "Out of place tEXt");
-         /* to quiet some compiler warnings */
-         if(info_ptr == NULL) return;
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
       }
 
 #ifdef PNG_MAX_MALLOC_64K
@@ -1159,8 +1158,7 @@
    if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
       {
          png_error(png_ptr, "Out of place zTXt");
-         /* to quiet some compiler warnings */
-         if(info_ptr == NULL) return;
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
       }
 
 #ifdef PNG_MAX_MALLOC_64K
@@ -1352,8 +1350,7 @@
    if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
       {
          png_error(png_ptr, "Out of place iTXt");
-         /* to quiet some compiler warnings */
-         if(info_ptr == NULL) return;
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
       }
 
 #ifdef PNG_MAX_MALLOC_64K
@@ -1480,9 +1477,7 @@
 #endif
          png_chunk_error(png_ptr, "unknown critical chunk");
 
-      /* to quiet compiler warnings about unused info_ptr */
-      if (info_ptr == NULL)
-         return;
+      info_ptr = info_ptr; /* to quiet some compiler warnings */
    }
 
 #if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
@@ -1496,8 +1491,9 @@
            length = (png_uint_32)65535L;
        }
 #endif
-       png_strcpy((png_charp)png_ptr->unknown_chunk.name,
-         (png_charp)png_ptr->chunk_name);
+       png_strncpy((png_charp)png_ptr->unknown_chunk.name,
+	 (png_charp)png_ptr->chunk_name,
+         png_sizeof((png_charp)png_ptr->chunk_name));
        png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
        png_ptr->unknown_chunk.size = (png_size_t)length;
        png_crc_read(png_ptr, (png_bytep)png_ptr->unknown_chunk.data, length);
@@ -1559,7 +1555,7 @@
    png_bytep old_row, png_bytep new_row)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
-   const int FARDATA png_pass_dsp_mask[7] =
+   PNG_CONST int FARDATA png_pass_dsp_mask[7] =
       {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
 #endif
    if(png_ptr == NULL) return;
diff --git a/pngread.c b/pngread.c
index 56dac8a..562414a 100644
--- a/pngread.c
+++ b/pngread.c
@@ -1,7 +1,7 @@
 
 /* pngread.c - read a PNG file
  *
- * Last changed in libpng 1.2.15 January 5, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -117,11 +117,13 @@
         char msg[80];
         if (user_png_ver)
         {
-          sprintf(msg, "Application was compiled with png.h from libpng-%.20s",
+          png_snprintf(msg, 80,
+             "Application was compiled with png.h from libpng-%.20s",
              user_png_ver);
           png_warning(png_ptr, msg);
         }
-        sprintf(msg, "Application  is  running with png.c from libpng-%.20s",
+        png_snprintf(msg, 80,
+             "Application  is  running with png.c from libpng-%.20s",
            png_libpng_ver);
         png_warning(png_ptr, msg);
 #endif
@@ -197,11 +199,13 @@
       png_ptr->warning_fn=NULL;
       if (user_png_ver)
       {
-        sprintf(msg, "Application was compiled with png.h from libpng-%.20s",
+        png_snprintf(msg, 80,
+           "Application was compiled with png.h from libpng-%.20s",
            user_png_ver);
         png_warning(png_ptr, msg);
       }
-      sprintf(msg, "Application  is  running with png.c from libpng-%.20s",
+      png_snprintf(msg, 80,
+         "Application  is  running with png.c from libpng-%.20s",
          png_libpng_ver);
       png_warning(png_ptr, msg);
    }
@@ -346,60 +350,60 @@
    for(;;)
    {
 #ifdef PNG_USE_LOCAL_ARRAYS
-      PNG_IHDR;
-      PNG_IDAT;
-      PNG_IEND;
-      PNG_PLTE;
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
 #if defined(PNG_READ_bKGD_SUPPORTED)
-      PNG_bKGD;
+      PNG_CONST PNG_bKGD;
 #endif
 #if defined(PNG_READ_cHRM_SUPPORTED)
-      PNG_cHRM;
+      PNG_CONST PNG_cHRM;
 #endif
 #if defined(PNG_READ_gAMA_SUPPORTED)
-      PNG_gAMA;
+      PNG_CONST PNG_gAMA;
 #endif
 #if defined(PNG_READ_hIST_SUPPORTED)
-      PNG_hIST;
+      PNG_CONST PNG_hIST;
 #endif
 #if defined(PNG_READ_iCCP_SUPPORTED)
-      PNG_iCCP;
+      PNG_CONST PNG_iCCP;
 #endif
 #if defined(PNG_READ_iTXt_SUPPORTED)
-      PNG_iTXt;
+      PNG_CONST PNG_iTXt;
 #endif
 #if defined(PNG_READ_oFFs_SUPPORTED)
-      PNG_oFFs;
+      PNG_CONST PNG_oFFs;
 #endif
 #if defined(PNG_READ_pCAL_SUPPORTED)
-      PNG_pCAL;
+      PNG_CONST PNG_pCAL;
 #endif
 #if defined(PNG_READ_pHYs_SUPPORTED)
-      PNG_pHYs;
+      PNG_CONST PNG_pHYs;
 #endif
 #if defined(PNG_READ_sBIT_SUPPORTED)
-      PNG_sBIT;
+      PNG_CONST PNG_sBIT;
 #endif
 #if defined(PNG_READ_sCAL_SUPPORTED)
-      PNG_sCAL;
+      PNG_CONST PNG_sCAL;
 #endif
 #if defined(PNG_READ_sPLT_SUPPORTED)
-      PNG_sPLT;
+      PNG_CONST PNG_sPLT;
 #endif
 #if defined(PNG_READ_sRGB_SUPPORTED)
-      PNG_sRGB;
+      PNG_CONST PNG_sRGB;
 #endif
 #if defined(PNG_READ_tEXt_SUPPORTED)
-      PNG_tEXt;
+      PNG_CONST PNG_tEXt;
 #endif
 #if defined(PNG_READ_tIME_SUPPORTED)
-      PNG_tIME;
+      PNG_CONST PNG_tIME;
 #endif
 #if defined(PNG_READ_tRNS_SUPPORTED)
-      PNG_tRNS;
+      PNG_CONST PNG_tRNS;
 #endif
 #if defined(PNG_READ_zTXt_SUPPORTED)
-      PNG_zTXt;
+      PNG_CONST PNG_zTXt;
 #endif
 #endif /* PNG_USE_LOCAL_ARRAYS */
       png_byte chunk_length[4];
@@ -567,9 +571,10 @@
 png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
-   PNG_IDAT;
-   const int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
-   const int png_pass_mask[7] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
+   PNG_CONST PNG_IDAT;
+   PNG_CONST int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55,
+     0xff};
+   PNG_CONST int png_pass_mask[7] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
 #endif
    int ret;
    if(png_ptr == NULL) return;
@@ -925,60 +930,60 @@
    do
    {
 #ifdef PNG_USE_LOCAL_ARRAYS
-      PNG_IHDR;
-      PNG_IDAT;
-      PNG_IEND;
-      PNG_PLTE;
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
 #if defined(PNG_READ_bKGD_SUPPORTED)
-      PNG_bKGD;
+      PNG_CONST PNG_bKGD;
 #endif
 #if defined(PNG_READ_cHRM_SUPPORTED)
-      PNG_cHRM;
+      PNG_CONST PNG_cHRM;
 #endif
 #if defined(PNG_READ_gAMA_SUPPORTED)
-      PNG_gAMA;
+      PNG_CONST PNG_gAMA;
 #endif
 #if defined(PNG_READ_hIST_SUPPORTED)
-      PNG_hIST;
+      PNG_CONST PNG_hIST;
 #endif
 #if defined(PNG_READ_iCCP_SUPPORTED)
-      PNG_iCCP;
+      PNG_CONST PNG_iCCP;
 #endif
 #if defined(PNG_READ_iTXt_SUPPORTED)
-      PNG_iTXt;
+      PNG_CONST PNG_iTXt;
 #endif
 #if defined(PNG_READ_oFFs_SUPPORTED)
-      PNG_oFFs;
+      PNG_CONST PNG_oFFs;
 #endif
 #if defined(PNG_READ_pCAL_SUPPORTED)
-      PNG_pCAL;
+      PNG_CONST PNG_pCAL;
 #endif
 #if defined(PNG_READ_pHYs_SUPPORTED)
-      PNG_pHYs;
+      PNG_CONST PNG_pHYs;
 #endif
 #if defined(PNG_READ_sBIT_SUPPORTED)
-      PNG_sBIT;
+      PNG_CONST PNG_sBIT;
 #endif
 #if defined(PNG_READ_sCAL_SUPPORTED)
-      PNG_sCAL;
+      PNG_CONST PNG_sCAL;
 #endif
 #if defined(PNG_READ_sPLT_SUPPORTED)
-      PNG_sPLT;
+      PNG_CONST PNG_sPLT;
 #endif
 #if defined(PNG_READ_sRGB_SUPPORTED)
-      PNG_sRGB;
+      PNG_CONST PNG_sRGB;
 #endif
 #if defined(PNG_READ_tEXt_SUPPORTED)
-      PNG_tEXt;
+      PNG_CONST PNG_tEXt;
 #endif
 #if defined(PNG_READ_tIME_SUPPORTED)
-      PNG_tIME;
+      PNG_CONST PNG_tIME;
 #endif
 #if defined(PNG_READ_tRNS_SUPPORTED)
-      PNG_tRNS;
+      PNG_CONST PNG_tRNS;
 #endif
 #if defined(PNG_READ_zTXt_SUPPORTED)
-      PNG_zTXt;
+      PNG_CONST PNG_zTXt;
 #endif
 #endif /* PNG_USE_LOCAL_ARRAYS */
 
@@ -1464,8 +1469,8 @@
    /* read rest of file, and get additional chunks in info_ptr - REQUIRED */
    png_read_end(png_ptr, info_ptr);
 
-   if(transforms == 0 || params == NULL)
-      /* quiet compiler warnings */ return;
+   transforms = transforms; /* quiet compiler warnings */
+   params = params;
 
 }
 #endif /* PNG_INFO_IMAGE_SUPPORTED */
diff --git a/pngrtran.c b/pngrtran.c
index 6ef0a1c..44049ec 100644
--- a/pngrtran.c
+++ b/pngrtran.c
@@ -1,7 +1,7 @@
 
 /* pngrtran.c - transforms the data in a row for PNG readers
  *
- * Last changed in libpng 1.2.15 January 5, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -1244,20 +1244,19 @@
 png_do_read_transformations(png_structp png_ptr)
 {
    png_debug(1, "in png_do_read_transformations\n");
-#if !defined(PNG_USELESS_TESTS_SUPPORTED)
    if (png_ptr->row_buf == NULL)
    {
 #if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
       char msg[50];
 
-      sprintf(msg, "NULL row buffer for row %ld, pass %d", png_ptr->row_number,
+      png_snprintf2(msg, 50,
+         "NULL row buffer for row %ld, pass %d", png_ptr->row_number,
          png_ptr->pass);
       png_error(png_ptr, msg);
 #else
       png_error(png_ptr, "NULL row buffer");
 #endif
    }
-#endif
 
 #if defined(PNG_READ_EXPAND_SUPPORTED)
    if (png_ptr->transformations & PNG_EXPAND)
@@ -1269,7 +1268,8 @@
       }
       else
       {
-         if (png_ptr->num_trans && (png_ptr->transformations & PNG_EXPAND_tRNS))
+         if (png_ptr->num_trans &&
+             (png_ptr->transformations & PNG_EXPAND_tRNS))
             png_do_expand(&(png_ptr->row_info), png_ptr->row_buf + 1,
                &(png_ptr->trans_values));
          else
@@ -1293,9 +1293,11 @@
       if(rgb_error)
       {
          png_ptr->rgb_to_gray_status=1;
-         if(png_ptr->transformations & PNG_RGB_TO_GRAY_WARN)
+         if((png_ptr->transformations & PNG_RGB_TO_GRAY) == 
+             PNG_RGB_TO_GRAY_WARN)
             png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel");
-         if(png_ptr->transformations & PNG_RGB_TO_GRAY_ERR)
+         if((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
+             PNG_RGB_TO_GRAY_ERR)
             png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel");
       }
    }
@@ -3699,7 +3701,7 @@
             {
                case 1:
                {
-                  gray = (png_uint_16)(gray*0xff);
+                  gray = (png_uint_16)((gray&0x01)*0xff);
                   sp = row + (png_size_t)((row_width - 1) >> 3);
                   dp = row + (png_size_t)row_width - 1;
                   shift = 7 - (int)((row_width + 7) & 0x07);
@@ -3723,7 +3725,7 @@
                }
                case 2:
                {
-                  gray = (png_uint_16)(gray*0x55);
+                  gray = (png_uint_16)((gray&0x03)*0x55);
                   sp = row + (png_size_t)((row_width - 1) >> 2);
                   dp = row + (png_size_t)row_width - 1;
                   shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
@@ -3746,7 +3748,7 @@
                }
                case 4:
                {
-                  gray = (png_uint_16)(gray*0x11);
+                  gray = (png_uint_16)((gray&0x0f)*0x11);
                   sp = row + (png_size_t)((row_width - 1) >> 1);
                   dp = row + (png_size_t)row_width - 1;
                   shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
@@ -3776,6 +3778,7 @@
          {
             if (row_info->bit_depth == 8)
             {
+               gray = gray & 0xff;
                sp = row + (png_size_t)row_width - 1;
                dp = row + (png_size_t)(row_width << 1) - 1;
                for (i = 0; i < row_width; i++)
@@ -3789,12 +3792,13 @@
             }
             else if (row_info->bit_depth == 16)
             {
+               png_byte gray_high = (gray >> 8) & 0xff;
+               png_byte gray_low = gray & 0xff;
                sp = row + row_info->rowbytes - 1;
                dp = row + (row_info->rowbytes << 1) - 1;
                for (i = 0; i < row_width; i++)
                {
-                  if (((png_uint_16)*(sp) |
-                     ((png_uint_16)*(sp - 1) << 8)) == gray)
+                  if (*(sp-1) == gray_high && *(sp) == gray_low) 
                   {
                      *dp-- = 0;
                      *dp-- = 0;
@@ -3819,13 +3823,14 @@
       {
          if (row_info->bit_depth == 8)
          {
+            png_byte red = trans_value->red & 0xff;
+            png_byte green = trans_value->green & 0xff;
+            png_byte blue = trans_value->blue & 0xff;
             sp = row + (png_size_t)row_info->rowbytes - 1;
             dp = row + (png_size_t)(row_width << 2) - 1;
             for (i = 0; i < row_width; i++)
             {
-               if (*(sp - 2) == trans_value->red &&
-                  *(sp - 1) == trans_value->green &&
-                  *(sp - 0) == trans_value->blue)
+               if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue)
                   *dp-- = 0;
                else
                   *dp-- = 0xff;
@@ -3836,16 +3841,22 @@
          }
          else if (row_info->bit_depth == 16)
          {
+            png_byte red_high = (trans_value->red > 8) & 0xff;
+            png_byte green_high = (trans_value->green > 8) & 0xff;
+            png_byte blue_high = (trans_value->blue > 8) & 0xff;
+            png_byte red_low = trans_value->red & 0xff;
+            png_byte green_low = trans_value->green & 0xff;
+            png_byte blue_low = trans_value->blue & 0xff;
             sp = row + row_info->rowbytes - 1;
             dp = row + (png_size_t)(row_width << 3) - 1;
             for (i = 0; i < row_width; i++)
             {
-               if ((((png_uint_16)*(sp - 4) |
-                  ((png_uint_16)*(sp - 5) << 8)) == trans_value->red) &&
-                  (((png_uint_16)*(sp - 2) |
-                  ((png_uint_16)*(sp - 3) << 8)) == trans_value->green) &&
-                  (((png_uint_16)*(sp - 0) |
-                  ((png_uint_16)*(sp - 1) << 8)) == trans_value->blue))
+               if (*(sp - 5) == red_high &&
+                  *(sp - 4) == red_low &&
+                  *(sp - 3) == green_high &&
+                  *(sp - 2) == green_low &&
+                  *(sp - 1) == blue_high &&
+                  *(sp    ) == blue_low)
                {
                   *dp-- = 0;
                   *dp-- = 0;
@@ -3965,7 +3976,7 @@
 
 #ifdef PNG_FLOATING_POINT_SUPPORTED
 #if defined(PNG_READ_GAMMA_SUPPORTED)
-const static int png_gamma_shift[] =
+static PNG_CONST int png_gamma_shift[] =
    {0x10, 0x21, 0x42, 0x84, 0x110, 0x248, 0x550, 0xff0, 0x00};
 
 /* We build the 8- or 16-bit gamma tables here.  Note that for 16-bit
diff --git a/pngrutil.c b/pngrutil.c
index 2abca7c..9e96632 100644
--- a/pngrutil.c
+++ b/pngrutil.c
@@ -1,7 +1,7 @@
 
 /* pngrutil.c - utilities to read a PNG file
  *
- * Last changed in libpng 1.2.17 May 15, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -19,7 +19,7 @@
 #ifdef PNG_FLOATING_POINT_SUPPORTED
 #  if defined(_WIN32_WCE)
 /* strtod() function is not supported on WindowsCE */
-__inline double png_strtod(png_structp png_ptr, const char *nptr, char **endptr)
+__inline double png_strtod(png_structp png_ptr, PNG_CONST char *nptr, char **endptr)
 {
    double result = 0;
    int len;
@@ -33,7 +33,7 @@
       result = wcstod(str, &end);
       len = WideCharToMultiByte(CP_ACP, 0, end, -1, NULL, 0, NULL, NULL);
       *endptr = (char *)nptr + (png_strlen(nptr) - len + 1);
-      png_free(str);
+      png_free(png_ptr, str);
    }
    return result;
 }
@@ -181,7 +181,7 @@
                               png_charp chunkdata, png_size_t chunklength,
                               png_size_t prefix_size, png_size_t *newlength)
 {
-   const static char msg[] = "Error decoding compressed text";
+   static PNG_CONST char msg[] = "Error decoding compressed text";
    png_charp text;
    png_size_t text_size;
 
@@ -282,13 +282,16 @@
          char umsg[52];
 
          if (ret == Z_BUF_ERROR)
-            sprintf(umsg,"Buffer error in compressed datastream in %s chunk",
+            png_snprintf(umsg, 52,
+                "Buffer error in compressed datastream in %s chunk",
                 png_ptr->chunk_name);
          else if (ret == Z_DATA_ERROR)
-            sprintf(umsg,"Data error in compressed datastream in %s chunk",
+            png_snprintf(umsg, 52,
+                "Data error in compressed datastream in %s chunk",
                 png_ptr->chunk_name);
          else
-            sprintf(umsg,"Incomplete compressed datastream in %s chunk",
+            png_snprintf(umsg, 52,
+                "Incomplete compressed datastream in %s chunk",
                 png_ptr->chunk_name);
          png_warning(png_ptr, umsg);
 #else
@@ -321,7 +324,8 @@
 #if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
       char umsg[50];
 
-      sprintf(umsg, "Unknown zTXt compression type %d", comp_type);
+      png_snprintf(umsg, 50,
+         "Unknown zTXt compression type %d", comp_type);
       png_warning(png_ptr, umsg);
 #else
       png_warning(png_ptr, "Unknown zTXt compression type");
@@ -563,8 +567,7 @@
    }
    png_crc_finish(png_ptr, length);
 
-   if (&info_ptr == NULL) /* quiet compiler warnings about unused info_ptr */
-      return;
+   info_ptr =info_ptr; /* quiet compiler warnings about unused info_ptr */
 }
 
 #if defined(PNG_READ_gAMA_SUPPORTED)
@@ -1037,7 +1040,7 @@
 
    /* there should be at least one zero (the compression type byte)
       following the separator, and we should be on it  */
-   if ( profile >= chunkdata + slength)
+   if ( profile >= chunkdata + slength - 1)
    {
       png_free(png_ptr, chunkdata);
       png_warning(png_ptr, "Malformed iCCP chunk");
@@ -1141,7 +1144,7 @@
    ++entry_start;
 
    /* a sample depth should follow the separator, and we should be on it  */
-   if (entry_start > chunkdata + slength)
+   if (entry_start > chunkdata + slength - 2)
    {
       png_free(png_ptr, chunkdata);
       png_warning(png_ptr, "malformed sPLT chunk");
@@ -1234,9 +1237,15 @@
 png_handle_tRNS(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
 {
    png_byte readbuf[PNG_MAX_PALETTE_LENGTH];
+   int bit_mask;
 
    png_debug(1, "in png_handle_tRNS\n");
 
+   /* For non-indexed color, mask off any bits in the tRNS value that
+    * exceed the bit depth.  Some creators were writing extra bits there.
+    * This is not needed for indexed color. */
+   bit_mask = (1 << png_ptr->bit_depth) - 1;
+
    if (!(png_ptr->mode & PNG_HAVE_IHDR))
       png_error(png_ptr, "Missing IHDR before tRNS");
    else if (png_ptr->mode & PNG_HAVE_IDAT)
@@ -1265,7 +1274,7 @@
 
       png_crc_read(png_ptr, buf, 2);
       png_ptr->num_trans = 1;
-      png_ptr->trans_values.gray = png_get_uint_16(buf);
+      png_ptr->trans_values.gray = png_get_uint_16(buf) & bit_mask;
    }
    else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
    {
@@ -1279,9 +1288,9 @@
       }
       png_crc_read(png_ptr, buf, (png_size_t)length);
       png_ptr->num_trans = 1;
-      png_ptr->trans_values.red = png_get_uint_16(buf);
-      png_ptr->trans_values.green = png_get_uint_16(buf + 2);
-      png_ptr->trans_values.blue = png_get_uint_16(buf + 4);
+      png_ptr->trans_values.red = png_get_uint_16(buf) & bit_mask;
+      png_ptr->trans_values.green = png_get_uint_16(buf + 2) & bit_mask;
+      png_ptr->trans_values.blue = png_get_uint_16(buf + 4) & bit_mask;
    }
    else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
    {
@@ -1981,10 +1990,11 @@
       /* empty loop */ ;
 
    /* zTXt must have some text after the chunkdataword */
-   if (text == chunkdata + slength)
+   if (text == chunkdata + slength - 1)
    {
-      comp_type = PNG_TEXT_COMPRESSION_NONE;
-      png_warning(png_ptr, "Zero length zTXt chunk");
+      png_warning(png_ptr, "Truncated zTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
    }
    else
    {
@@ -2084,10 +2094,11 @@
       translated keyword (possibly empty), and possibly some text after the
       keyword */
 
-   if (lang >= chunkdata + slength)
+   if (lang >= chunkdata + slength - 3)
    {
-      comp_flag = PNG_TEXT_COMPRESSION_NONE;
-      png_warning(png_ptr, "Zero length iTXt chunk");
+      png_warning(png_ptr, "Truncated iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
    }
    else
    {
@@ -2102,6 +2113,12 @@
    for (text = lang_key; *text; text++)
       /* empty loop */ ;
    text++;        /* skip NUL separator */
+   if (text >= chunkdata + slength)
+   {
+      png_warning(png_ptr, "Malformed iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
 
    prefix_len = text - chunkdata;
 
@@ -2151,7 +2168,7 @@
    if (png_ptr->mode & PNG_HAVE_IDAT)
    {
 #ifdef PNG_USE_LOCAL_ARRAYS
-      PNG_IDAT;
+      PNG_CONST PNG_IDAT;
 #endif
       if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))  /* not an IDAT */
          png_ptr->mode |= PNG_AFTER_IDAT;
@@ -2184,8 +2201,9 @@
            length = (png_uint_32)65535L;
        }
 #endif
-       png_strcpy((png_charp)png_ptr->unknown_chunk.name,
-         (png_charp)png_ptr->chunk_name);
+       png_strncpy((png_charp)png_ptr->unknown_chunk.name,
+	 (png_charp)png_ptr->chunk_name,
+         png_sizeof((png_charp)png_ptr->chunk_name));
        png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
        png_ptr->unknown_chunk.size = (png_size_t)length;
        png_crc_read(png_ptr, (png_bytep)png_ptr->unknown_chunk.data, length);
@@ -2221,8 +2239,7 @@
    png_crc_finish(png_ptr, skip);
 
 #if !defined(PNG_READ_USER_CHUNKS_SUPPORTED)
-   if (&info_ptr == NULL) /* quiet compiler warnings about unused info_ptr */
-      return;
+   info_ptr = info_ptr; /* quiet compiler warnings about unused info_ptr */
 #endif
 }
 
@@ -2255,11 +2272,1050 @@
    a zero indicates the pixel is to be skipped.  This is in addition
    to any alpha or transparency value associated with the pixel.  If
    you want all pixels to be combined, pass 0xff (255) in mask.  */
-#ifndef PNG_HAVE_MMX_COMBINE_ROW
+
+/* Optimized C version of utilities to read a PNG file
+ *
+ * Based on code contributed by Nirav Chhatrapati, Intel Corp., 1998.
+ * Interface to libpng contributed by Gilles Vollant, 1999.
+ * GNU C port by Greg Roelofs, 1999-2001.
+ *
+ */
+
+#if defined(PNG_OPTIMIZED_CODE_SUPPORTED)
+#if !defined(PNG_HAVE_MMX_COMBINE_ROW)
+
+/*===========================================================================*/
+/*                                                                           */
+/*                       P N G _ C O M B I N E _ R O W                       */
+/*                                                                           */
+/*===========================================================================*/
+
+
+#define BPP2  2
+#define BPP3  3 /* bytes per pixel (a.k.a. pixel_bytes) */
+#define BPP4  4
+#define BPP6  6 /* (defined only to help avoid cut-and-paste errors) */
+#define BPP8  8
+
+/* Combines the row recently read in with the previous row.
+   This routine takes care of alpha and transparency if requested.
+   This routine also handles the two methods of progressive display
+   of interlaced images, depending on the mask value.
+   The mask value describes which pixels are to be combined with
+   the row.  The pattern always repeats every 8 pixels, so just 8
+   bits are needed.  A one indicates the pixel is to be combined; a
+   zero indicates the pixel is to be skipped.  This is in addition
+   to any alpha or transparency value associated with the pixel.
+   If you want all pixels to be combined, pass 0xff (255) in mask. */
+
+/* Use this routine for the x86 platform - it uses a faster MMX routine
+   if the machine supports MMX. */
+
 void /* PRIVATE */
 png_combine_row(png_structp png_ptr, png_bytep row, int mask)
 {
-   png_debug(1,"in png_combine_row\n");
+
+#if defined(PNG_USE_LOCAL_ARRAYS)
+static PNG_CONST int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+static PNG_CONST int FARDATA png_pass_inc[7]   = {8, 8, 4, 4, 2, 2, 1};
+static PNG_CONST int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
+#endif
+
+   png_debug(1, "in png_combine_row (pngrutil.c OPTIMIZED)\n");
+
+   if (mask == 0xff)
+   {
+      png_debug(2,"mask == 0xff:  doing single png_memcpy()\n");
+      png_memcpy(row, png_ptr->row_buf + 1,
+       (png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,png_ptr->width));
+   }
+   else   /* (png_combine_row() is never called with mask == 0) */
+   {
+      switch (png_ptr->row_info.pixel_depth)
+      {
+         /* most common case:  combining 24-bit RGB */
+         case 24:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = BPP3 * png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = BPP3 * png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = BPP3 * png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7; /* reduce to mult of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = BPP3 * len;   /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff*BPP3;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            } /* end of else (_mmx_supported) */
+
+            break;
+         }       /* end 24 bpp */
+
+         case 32:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = BPP4 * png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = BPP4 * png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = BPP4 * png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7; /* reduce to mult of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = BPP4 * len;   /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff*BPP4;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            }
+
+            break;
+         }       /* end 32 bpp */
+
+         case 8:        /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7; /* reduce to mult of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = len;  /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff /* *BPP1 */ ;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            }
+
+            break;
+         }       /* end 8 bpp */
+
+         case 1:        /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_inc, s_start, s_end;
+            int m;
+            int shift;
+            png_uint_32 i;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+            else
+#endif
+            {
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  int value;
+
+                  value = (*sp >> shift) & 0x1;
+                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }       /* end 1 bpp */
+
+         case 2:        /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_start, s_end, s_inc;
+            int m;
+            int shift;
+            png_uint_32 i;
+            int value;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+            else
+#endif
+            {
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0x3;
+                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }       /* end 2 bpp */
+
+         case 4:        /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_start, s_end, s_inc;
+            int m;
+            int shift;
+            png_uint_32 i;
+            int value;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+            else
+#endif
+            {
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0xf;
+                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }       /* end 4 bpp */
+
+         case 16:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = BPP2 * png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = BPP2 * png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = BPP2 * png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = BPP2 * len;   /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff*BPP2;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            } /* end of else (_mmx_supported) */
+
+            break;
+         }       /* end 16 bpp */
+
+
+
+         case 48:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+            {
+               register png_uint_32 i;
+               png_uint_32 initial_val = BPP6 * png_pass_start[png_ptr->pass];
+                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+               register int stride = BPP6 * png_pass_inc[png_ptr->pass];
+                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+               register int rep_bytes = BPP6 * png_pass_width[png_ptr->pass];
+                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+               png_uint_32 len = png_ptr->width &~7; /* reduce to mult of 8 */
+               int diff = (int) (png_ptr->width & 7); /* amount lost */
+               register png_uint_32 final_val = BPP6 * len;   /* GRR bugfix */
+
+               srcptr = png_ptr->row_buf + 1 + initial_val;
+               dstptr = row + initial_val;
+
+               for (i = initial_val; i < final_val; i += stride)
+               {
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+               if (diff)  /* number of leftover pixels:  3 for pngtest */
+               {
+                  final_val += diff*BPP6;
+                  for (; i < final_val; i += stride)
+                  {
+                     if (rep_bytes > (int)(final_val-i))
+                        rep_bytes = (int)(final_val-i);
+                     png_memcpy(dstptr, srcptr, rep_bytes);
+                     srcptr += stride;
+                     dstptr += stride;
+                  }
+               }
+            }
+            break;
+         }       /* end 48 bpp */
+
+         case 64:       /* png_ptr->row_info.pixel_depth */
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+            register png_uint_32 i;
+            png_uint_32 initial_val = BPP8 * png_pass_start[png_ptr->pass];
+              /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
+            register int stride = BPP8 * png_pass_inc[png_ptr->pass];
+              /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
+            register int rep_bytes = BPP8 * png_pass_width[png_ptr->pass];
+              /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
+            png_uint_32 len = png_ptr->width &~7;  /* reduce to mult of 8 */
+            int diff = (int) (png_ptr->width & 7); /* amount lost */
+            register png_uint_32 final_val = BPP8 * len;   /* GRR bugfix */
+
+            srcptr = png_ptr->row_buf + 1 + initial_val;
+            dstptr = row + initial_val;
+
+            for (i = initial_val; i < final_val; i += stride)
+            {
+               png_memcpy(dstptr, srcptr, rep_bytes);
+               srcptr += stride;
+               dstptr += stride;
+            }
+            if (diff)  /* number of leftover pixels:  3 for pngtest */
+            {
+               final_val += diff*BPP8;
+               for (; i < final_val; i += stride)
+               {
+                  if (rep_bytes > (int)(final_val-i))
+                     rep_bytes = (int)(final_val-i);
+                  png_memcpy(dstptr, srcptr, rep_bytes);
+                  srcptr += stride;
+                  dstptr += stride;
+               }
+            }
+
+            break;
+         }       /* end 64 bpp */
+
+         default: /* png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64 */
+         {
+            /* this should never happen */
+            png_warning(png_ptr, "Invalid row_info.pixel_depth in pngrutil");
+            break;
+         }
+      } /* end switch (png_ptr->row_info.pixel_depth) */
+
+   } /* end if (non-trivial mask) */
+
+} /* end png_combine_row() */
+#endif /* PNG_HAVE_MMX_COMBINE_ROW */
+
+
+
+/*===========================================================================*/
+/*                                                                           */
+/*                 P N G _ D O _ R E A D _ I N T E R L A C E                 */
+/*                                                                           */
+/*===========================================================================*/
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+#if !defined(PNG_HAVE_MMX_READ_INTERLACE)
+
+/* png_do_read_interlace() is called after any 16-bit to 8-bit conversion
+ * has taken place.  [GRR: what other steps come before and/or after?]
+ */
+
+void /* PRIVATE */
+png_do_read_interlace(png_structp png_ptr)
+{
+#if defined(PNG_USE_LOCAL_ARRAYS)
+static PNG_CONST int FARDATA png_pass_inc[7]   = {8, 8, 4, 4, 2, 2, 1};
+#endif
+   png_row_infop row_info = &(png_ptr->row_info);
+   png_bytep row = png_ptr->row_buf + 1;
+   int pass = png_ptr->pass;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+   png_uint_32 transformations = png_ptr->transformations;
+#endif
+   png_debug(1,"in png_do_read_interlace (pngrutil.c OPTIMIZED)\n");
+
+   if (row != NULL && row_info != NULL)
+   {
+      png_uint_32 final_width;
+
+      final_width = row_info->width * png_pass_inc[pass];
+
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp, dp;
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_byte v;
+            png_uint_32 i;
+            int j;
+
+            sp = row + (png_size_t)((row_info->width - 1) >> 3);
+            dp = row + (png_size_t)((final_width - 1) >> 3);
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (int)((row_info->width + 7) & 7);
+               dshift = (int)((final_width + 7) & 7);
+               s_start = 7;
+               s_end = 0;
+               s_inc = -1;
+            }
+            else
+#endif
+            {
+               sshift = 7 - (int)((row_info->width + 7) & 7);
+               dshift = 7 - (int)((final_width + 7) & 7);
+               s_start = 0;
+               s_end = 7;
+               s_inc = 1;
+            }
+
+            for (i = row_info->width; i; i--)
+            {
+               v = (png_byte)((*sp >> sshift) & 0x1);
+               for (j = 0; j < png_pass_inc[pass]; j++)
+               {
+                  *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+
+         case 2:
+         {
+            png_bytep sp, dp;
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_uint_32 i;
+
+            sp = row + (png_size_t)((row_info->width - 1) >> 2);
+            dp = row + (png_size_t)((final_width - 1) >> 2);
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (png_size_t)(((row_info->width + 3) & 3) << 1);
+               dshift = (png_size_t)(((final_width + 3) & 3) << 1);
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+            else
+#endif
+            {
+               sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1);
+               dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1);
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+
+            for (i = row_info->width; i; i--)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0x3);
+               for (j = 0; j < png_pass_inc[pass]; j++)
+               {
+                  *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+
+         case 4:
+         {
+            png_bytep sp, dp;
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_uint_32 i;
+
+            sp = row + (png_size_t)((row_info->width - 1) >> 1);
+            dp = row + (png_size_t)((final_width - 1) >> 1);
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (png_size_t)(((row_info->width + 1) & 1) << 2);
+               dshift = (png_size_t)(((final_width + 1) & 1) << 2);
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            else
+#endif
+            {
+               sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2);
+               dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2);
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+
+            for (i = row_info->width; i; i--)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0xf);
+               for (j = 0; j < png_pass_inc[pass]; j++)
+               {
+                  *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+
+       /*====================================================================*/
+
+         default: /* 8-bit or larger (this is where the routine is modified) */
+         {
+            png_bytep sptr, dp;
+            png_uint_32 i;
+            png_size_t pixel_bytes;
+            int width = (int)row_info->width;
+
+            pixel_bytes = (row_info->pixel_depth >> 3);
+
+            /* point sptr at the last pixel in the pre-expanded row: */
+            sptr = row + (width - 1) * pixel_bytes;
+
+            /* point dp at the last pixel position in the expanded row: */
+            dp = row + (final_width - 1) * pixel_bytes;
+
+            /* MMX not supported:  use modified C code - takes advantage
+             *   of inlining of png_memcpy for a constant */
+            /* GRR 19991007:  does it?  or should pixel_bytes in each
+             *   block be replaced with immediate value (e.g., 1)? */
+            /* GRR 19991017:  replaced with constants in each case */
+            {
+               if (pixel_bytes == 1)
+               {
+                  for (i = width; i; i--)
+                  {
+                     int j;
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        *dp-- = *sptr;
+                     }
+                     --sptr;
+                  }
+               }
+               else if (pixel_bytes == 3)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 3);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 3);
+                        dp -= 3;
+                     }
+                     sptr -= 3;
+                  }
+               }
+               else if (pixel_bytes == 2)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 2);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 2);
+                        dp -= 2;
+                     }
+                     sptr -= 2;
+                  }
+               }
+               else if (pixel_bytes == 4)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 4);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+#if defined(PNG_DEBUG) && defined(PNG_1_0_X)
+                        if (dp < row || dp+3 > row+png_ptr->row_buf_size)
+                        {
+                           printf("dp out of bounds: row=%d, dp=%d, rp=%d\n",
+                             row, dp, row+png_ptr->row_buf_size);
+                           printf("row_buf=%d\n",png_ptr->row_buf_size);
+                        }
+#endif
+                        png_memcpy(dp, v, 4);
+                        dp -= 4;
+                     }
+                     sptr -= 4;
+                  }
+               }
+               else if (pixel_bytes == 6)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 6);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 6);
+                        dp -= 6;
+                     }
+                     sptr -= 6;
+                  }
+               }
+               else if (pixel_bytes == 8)
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, 8);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, 8);
+                        dp -= 8;
+                     }
+                     sptr -= 8;
+                  }
+               }
+               else     /* GRR:  should never be reached */
+               {
+                  for (i = width; i; i--)
+                  {
+                     png_byte v[8];
+                     int j;
+                     png_memcpy(v, sptr, pixel_bytes);
+                     for (j = 0; j < png_pass_inc[pass]; j++)
+                     {
+                        png_memcpy(dp, v, pixel_bytes);
+                        dp -= pixel_bytes;
+                     }
+                     sptr -= pixel_bytes;
+                  }
+               }
+
+            }
+            break;
+         }
+      } /* end switch (row_info->pixel_depth) */
+
+      row_info->width = final_width;
+
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
+   }
+
+} /* end png_do_read_interlace() */
+
+#endif /* PNG_HAVE_MMX_READ_INTERLACE */
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+
+
+
+#if !defined(PNG_HAVE_MMX_READ_FILTER_ROW)
+/*===========================================================================*/
+/*                                                                           */
+/*                   P N G _ R E A D _ F I L T E R _ R O W                   */
+/*                                                                           */
+/*===========================================================================*/
+
+
+/* Optimized png_read_filter_row routines */
+
+void /* PRIVATE */
+png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
+   row, png_bytep prev_row, int filter)
+{
+#if defined(PNG_DEBUG)
+   char filnm[10];
+#endif
+
+
+#if defined(PNG_DEBUG)
+   png_debug(1, "in png_read_filter_row (pngrutil.c OPTIMIZED)\n");
+   switch (filter)
+   {
+      case 0:
+         png_snprintf(filnm, 10, "none");
+         break;
+
+      case 1:
+         png_snprintf(filnm, 10, "sub-%s",
+             "x86");
+         break;
+
+      case 2:
+         png_snprintf(filnm, 10, "up-%s",
+             "x86");
+         break;
+
+      case 3:
+         png_snprintf(filnm, 10, "avg-%s",
+             "x86");
+         break;
+
+      case 4:
+         png_snprintf(filnm, 10, "Paeth-%s",
+             "x86");
+         break;
+
+      default:
+         png_snprintf(filnm, 10, "unknown");
+         break;
+   }
+   png_debug2(0, "row_number=%5ld, %10s, ", png_ptr->row_number, filnm);
+   png_debug1(0, "row=0x%08lx, ", (unsigned long)row);
+   png_debug2(0, "pixdepth=%2d, bytes=%d, ", (int)row_info->pixel_depth,
+      (int)((row_info->pixel_depth + 7) >> 3));
+   png_debug1(0,"rowbytes=%8ld\n", row_info->rowbytes);
+#endif /* PNG_DEBUG */
+
+   switch (filter)
+   {
+      case PNG_FILTER_VALUE_NONE:
+         break;
+
+      case PNG_FILTER_VALUE_SUB:
+         {
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+            png_bytep rp = row + bpp;
+            png_bytep lp = row;
+
+            for (i = bpp; i < istop; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
+               rp++;
+            }
+         }
+         break;
+
+      case PNG_FILTER_VALUE_UP:
+         {
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+            png_bytep rp = row;
+            png_bytep pp = prev_row;
+
+            for (i = 0; i < istop; ++i)
+            {
+               *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+               rp++;
+            }
+         }
+         break;
+
+      case PNG_FILTER_VALUE_AVG:
+         {
+            png_uint_32 i;
+            png_bytep rp = row;
+            png_bytep pp = prev_row;
+            png_bytep lp = row;
+            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+            png_uint_32 istop = row_info->rowbytes - bpp;
+
+            for (i = 0; i < bpp; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) +
+                  ((int)(*pp++) >> 1)) & 0xff);
+               rp++;
+            }
+
+            for (i = 0; i < istop; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) +
+                  ((int)(*pp++ + *lp++) >> 1)) & 0xff);
+               rp++;
+            }
+         }
+         break;
+
+      case PNG_FILTER_VALUE_PAETH:
+         {
+            png_uint_32 i;
+            png_bytep rp = row;
+            png_bytep pp = prev_row;
+            png_bytep lp = row;
+            png_bytep cp = prev_row;
+            png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+            png_uint_32 istop = row_info->rowbytes - bpp;
+
+            for (i = 0; i < bpp; i++)
+            {
+               *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+               rp++;
+            }
+
+            for (i = 0; i < istop; i++)   /* use leftover rp,pp */
+            {
+               int a, b, c, pa, pb, pc, p;
+
+               a = *lp++;
+               b = *pp++;
+               c = *cp++;
+
+               p = b - c;
+               pc = a - c;
+
+#if defined(PNG_USE_ABS)
+               pa = abs(p);
+               pb = abs(pc);
+               pc = abs(p + pc);
+#else
+               pa = p < 0 ? -p : p;
+               pb = pc < 0 ? -pc : pc;
+               pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+               /*
+                  if (pa <= pb && pa <= pc)
+                     p = a;
+                  else if (pb <= pc)
+                     p = b;
+                  else
+                     p = c;
+                */
+
+               p = (pa <= pb && pa <= pc) ? a : (pb <= pc) ? b : c;
+
+               *rp = (png_byte)(((int)(*rp) + p) & 0xff);
+               rp++;
+            }
+         }
+         break;
+
+      default:
+         png_warning(png_ptr, "Ignoring bad row-filter type");
+         *row=0;
+         break;
+   }
+}
+
+#endif /* PNG_HAVE_MMX_READ_FILTER_ROW */
+#endif /* PNG_OPTIMIZED_CODE_SUPPORTED */
+
+#if !defined(PNG_USE_PNGGCCRD) && !defined(PNG_USE_PNGVCRD)
+#if !defined(PNG_OPTIMIZED_CODE_SUPPORTED)
+/* Use the unoptimized original C code.  This might be removed from a future
+ * version of libpng if testing proves it to be worthless. */
+void /* PRIVATE */
+png_combine_row(png_structp png_ptr, png_bytep row, int mask)
+{
+   png_debug(1,"in png_combine_row NOT OPTIMIZED\n");
    if (mask == 0xff)
    {
       png_memcpy(row, png_ptr->row_buf + 1,
@@ -2456,10 +3512,8 @@
       }
    }
 }
-#endif /* !PNG_HAVE_MMX_COMBINE_ROW */
 
 #ifdef PNG_READ_INTERLACING_SUPPORTED
-#ifndef PNG_HAVE_MMX_READ_INTERLACE   /* else in pngvcrd.c, pnggccrd.c */
 /* OLD pre-1.0.9 interface:
 void png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
    png_uint_32 transformations)
@@ -2474,10 +3528,10 @@
 #ifdef PNG_USE_LOCAL_ARRAYS
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
    /* offset to next interlace block */
-   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 #endif
 
-   png_debug(1,"in png_do_read_interlace (stock C version)\n");
+   png_debug(1,"in png_do_read_interlace (pngrutil.c NOT OPTIMIZED)\n");
    if (row != NULL && row_info != NULL)
    {
       png_uint_32 final_width;
@@ -2681,19 +3735,16 @@
       row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
    }
 #if !defined(PNG_READ_PACKSWAP_SUPPORTED)
-   if (&transformations == NULL) /* silence compiler warning */
-      return;
+   transformations = transformations; /* silence compiler warning */
 #endif
 }
-#endif /* !PNG_HAVE_MMX_READ_INTERLACE */
 #endif /* PNG_READ_INTERLACING_SUPPORTED */
 
-#ifndef PNG_HAVE_MMX_READ_FILTER_ROW
 void /* PRIVATE */
 png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep row,
    png_bytep prev_row, int filter)
 {
-   png_debug(1, "in png_read_filter_row\n");
+   png_debug(1, "in png_read_filter_row (NOT OPTIMIZED)\n");
    png_debug2(2,"row = %lu, filter = %d\n", png_ptr->row_number, filter);
    switch (filter)
    {
@@ -2811,7 +3862,8 @@
          break;
    }
 }
-#endif /* !PNG_HAVE_MMX_READ_FILTER_ROW */
+#endif /* !PNG_OPTIMIZED_CODE_SUPPORTED */
+#endif /* !PNG_USE_PNGGCCRD && !PNG_USE_PNGVCRD */
 
 void /* PRIVATE */
 png_read_finish_row(png_structp png_ptr)
@@ -2820,16 +3872,16 @@
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
 
    /* start of interlace block */
-   const int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+   PNG_CONST int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
 
    /* offset to next interlace block */
-   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 
    /* start of interlace block in the y direction */
-   const int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+   PNG_CONST int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
 
    /* offset to next interlace block in the y direction */
-   const int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+   PNG_CONST int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
 #endif
 
    png_debug(1, "in png_read_finish_row\n");
@@ -2840,7 +3892,8 @@
    if (png_ptr->interlaced)
    {
       png_ptr->row_number = 0;
-      png_memset_check(png_ptr, png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
+      png_memset_check(png_ptr, png_ptr->prev_row, 0,
+         png_ptr->rowbytes + 1);
       do
       {
          png_ptr->pass++;
@@ -2874,7 +3927,7 @@
    if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
    {
 #ifdef PNG_USE_LOCAL_ARRAYS
-      PNG_IDAT;
+      PNG_CONST PNG_IDAT;
 #endif
       char extra;
       int ret;
@@ -2947,16 +4000,16 @@
    /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
 
    /* start of interlace block */
-   const int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+   PNG_CONST int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
 
    /* offset to next interlace block */
-   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 
    /* start of interlace block in the y direction */
-   const int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+   PNG_CONST int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
 
    /* offset to next interlace block in the y direction */
-   const int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+   PNG_CONST int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
 #endif
 
    int max_pixel_depth;
@@ -3105,7 +4158,7 @@
 #endif
    png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes+64);
    png_ptr->row_buf = png_ptr->big_row_buf+32;
-#if defined(PNG_DEBUG) && defined(PNG_USE_PNGGCCRD)
+#if defined(PNG_DEBUG) && defined(PNG_USE_PNGGCCRD) && defined(PNG_1_0_X)
    png_ptr->row_buf_size = row_bytes;
 #endif
 
diff --git a/pngset.c b/pngset.c
index f1a1ef6..d433331 100644
--- a/pngset.c
+++ b/pngset.c
@@ -353,7 +353,7 @@
    info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth);
 
    /* check for potential overflow */
-   if ( width > (PNG_UINT_32_MAX
+   if (width > (PNG_UINT_32_MAX
                  >> 3)      /* 8-byte RGBA pixels */
                  - 64       /* bigrowbuf hack */
                  - 1        /* filter byte */
@@ -484,7 +484,8 @@
    info_ptr->scal_s_width = (png_charp)png_malloc_warn(png_ptr, length);
    if (info_ptr->scal_s_width == NULL)
    {
-      png_warning(png_ptr, "Memory allocation failed while processing sCAL.");
+      png_warning(png_ptr,
+       "Memory allocation failed while processing sCAL.");
    }
    png_memcpy(info_ptr->scal_s_width, swidth, (png_size_t)length);
 
@@ -494,7 +495,8 @@
    if (info_ptr->scal_s_height == NULL)
    {
       png_free (png_ptr, info_ptr->scal_s_width);
-      png_warning(png_ptr, "Memory allocation failed while processing sCAL.");
+      png_warning(png_ptr,
+       "Memory allocation failed while processing sCAL.");
    }
    png_memcpy(info_ptr->scal_s_height, sheight, (png_size_t)length);
 
@@ -688,7 +690,7 @@
       png_warning(png_ptr, "Insufficient memory to process iCCP chunk.");
       return;
    }
-   png_strcpy(new_iccp_name, name);
+   png_strncpy(new_iccp_name, name, png_sizeof(new_iccp_name));
    new_iccp_profile = (png_charp)png_malloc_warn(png_ptr, proflen);
    if (new_iccp_profile == NULL)
    {
@@ -973,15 +975,27 @@
         png_sPLT_tp to = np + info_ptr->splt_palettes_num + i;
         png_sPLT_tp from = entries + i;
 
-        to->name = (png_charp)png_malloc(png_ptr,
-            png_strlen(from->name) + 1);
+        to->name = (png_charp)png_malloc_warn(png_ptr,
+          png_strlen(from->name) + 1);
+        if (to->name == NULL)
+        {
+           png_warning(png_ptr,
+             "Out of memory while processing sPLT chunk");
+        }
         /* TODO: use png_malloc_warn */
-        png_strcpy(to->name, from->name);
-        to->entries = (png_sPLT_entryp)png_malloc(png_ptr,
+        png_strncpy(to->name, from->name, png_strlen(from->name));
+        to->entries = (png_sPLT_entryp)png_malloc_warn(png_ptr,
             from->nentries * png_sizeof(png_sPLT_entry));
         /* TODO: use png_malloc_warn */
         png_memcpy(to->entries, from->entries,
             from->nentries * png_sizeof(png_sPLT_entry));
+        if (to->entries == NULL)
+        {
+           png_warning(png_ptr,
+             "Out of memory while processing sPLT chunk");
+           png_free(png_ptr,to->name);
+           to->name = NULL;
+        }
         to->nentries = from->nentries;
         to->depth = from->depth;
     }
@@ -1011,7 +1025,8 @@
         png_sizeof(png_unknown_chunk));
     if (np == NULL)
     {
-       png_warning(png_ptr, "Out of memory while processing unknown chunk.");
+       png_warning(png_ptr,
+          "Out of memory while processing unknown chunk.");
        return;
     }
 
@@ -1029,7 +1044,8 @@
         to->data = (png_bytep)png_malloc_warn(png_ptr, from->size);
         if (to->data == NULL)
         {
-           png_warning(png_ptr, "Out of memory processing unknown chunk.");
+           png_warning(png_ptr,
+              "Out of memory while processing unknown chunk.");
         }
         else
         {
diff --git a/pngtest.c b/pngtest.c
index 25836ff..e3703d0 100644
--- a/pngtest.c
+++ b/pngtest.c
@@ -434,8 +434,9 @@
       png_error(png_ptr, "Write Error");
    }
 }
-
 #endif /* USE_FAR_KEYWORD */
+#endif /* PNG_NO_STDIO */
+/* END of code to validate stdio-free compilation */
 
 /* This function is called when there is a warning, but the library thinks
  * it can continue anyway.  Replacement functions don't have to do anything
@@ -463,8 +464,6 @@
    /* We can return because png_error calls the default handler, which is
     * actually OK in this case. */
 }
-#endif /* PNG_NO_STDIO */
-/* END of code to validate stdio-free compilation */
 
 /* START of code to validate memory allocation and deallocation */
 #if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
@@ -658,10 +657,8 @@
    read_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
       png_error_ptr_NULL, png_error_ptr_NULL);
 #endif
-#if defined(PNG_NO_STDIO)
    png_set_error_fn(read_ptr, (png_voidp)inname, pngtest_error,
        pngtest_warning);
-#endif
 #ifdef PNG_WRITE_SUPPORTED
 #if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
    write_ptr = png_create_write_struct_2(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
@@ -671,11 +668,9 @@
    write_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, png_voidp_NULL,
       png_error_ptr_NULL, png_error_ptr_NULL);
 #endif
-#if defined(PNG_NO_STDIO)
    png_set_error_fn(write_ptr, (png_voidp)inname, pngtest_error,
        pngtest_warning);
 #endif
-#endif
    png_debug(0, "Allocating read_info, write_info and end_info structures\n");
    read_info_ptr = png_create_info_struct(read_ptr);
    end_info_ptr = png_create_info_struct(read_ptr);
@@ -1007,10 +1002,11 @@
       {
          png_set_tIME(write_ptr, write_info_ptr, mod_time);
 #if defined(PNG_TIME_RFC1123_SUPPORTED)
-         /* we have to use png_strcpy instead of "=" because the string
+         /* we have to use png_strncpy instead of "=" because the string
             pointed to by png_convert_to_rfc1123() gets free'ed before
             we use it */
-         png_strcpy(tIME_string,png_convert_to_rfc1123(read_ptr, mod_time));
+         png_strncpy(tIME_string,png_convert_to_rfc1123(read_ptr,
+            mod_time),30);
          tIME_chunk_present++;
 #endif /* PNG_TIME_RFC1123_SUPPORTED */
       }
@@ -1147,10 +1143,11 @@
       {
          png_set_tIME(write_ptr, write_end_info_ptr, mod_time);
 #if defined(PNG_TIME_RFC1123_SUPPORTED)
-         /* we have to use png_strcpy instead of "=" because the string
+         /* we have to use png_strncpy instead of "=" because the string
             pointed to by png_convert_to_rfc1123() gets free'ed before
             we use it */
-         png_strcpy(tIME_string,png_convert_to_rfc1123(read_ptr, mod_time));
+         png_strncpy(tIME_string,png_convert_to_rfc1123(read_ptr,
+            mod_time),30);
          tIME_chunk_present++;
 #endif /* PNG_TIME_RFC1123_SUPPORTED */
       }
@@ -1551,4 +1548,4 @@
 }
 
 /* Generate a compiler error if there is an old png.h in the search path. */
-typedef version_1_0_26 your_png_h_is_not_version_1_0_26;
+typedef version_1_0_27rc1 your_png_h_is_not_version_1_0_27rc1;
diff --git a/pngtrans.c b/pngtrans.c
index 8cb4b88..1640095 100644
--- a/pngtrans.c
+++ b/pngtrans.c
@@ -1,9 +1,9 @@
 
 /* pngtrans.c - transforms the data in a row (used by both readers and writers)
  *
- * Last changed in libpng 1.2.13 November 13, 2006
+ * Last changed in libpng 1.2.17 May 15, 2007
  * For conditions of distribution and use, see copyright notice in png.h
- * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
  * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
  */
@@ -252,7 +252,7 @@
 #endif
 
 #if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-const static PNG_CONST png_byte onebppswaptable[256] = {
+static PNG_CONST png_byte onebppswaptable[256] = {
    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
    0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
@@ -287,7 +287,7 @@
    0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
 };
 
-const static PNG_CONST png_byte twobppswaptable[256] = {
+static PNG_CONST png_byte twobppswaptable[256] = {
    0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0,
    0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
    0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4,
@@ -322,7 +322,7 @@
    0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
 };
 
-const static PNG_CONST png_byte fourbppswaptable[256] = {
+static PNG_CONST png_byte fourbppswaptable[256] = {
    0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
    0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0,
    0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
diff --git a/pngvcrd.c b/pngvcrd.c
index d1c6405..8845936 100644
--- a/pngvcrd.c
+++ b/pngvcrd.c
@@ -3,9 +3,9 @@
  *
  * For Intel x86 CPU and Microsoft Visual C++ compiler
  *
- * Last changed in libpng 1.2.6 - August 15, 2004
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
- * Copyright (c) 1998-2004 Glenn Randers-Pehrson
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * Copyright (c) 1998, Intel Corporation
  *
  * Contributed by Nirav Chhatrapati, Intel Corporation, 1998
@@ -22,12 +22,15 @@
  *
  * [runtime MMX configuration, GRR 20010102]
  *
+ * [Copy 6 bytes per pixel, not 4, and use stride of 6, not 4, in the
+ *  second loop of interlace processing of 48-bit pixels, GR-P 20070717]
  */
 
 #define PNG_INTERNAL
 #include "png.h"
 
-#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)
+#if defined(PNG_MMX_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)
+
 
 static int mmx_supported=2;
 
@@ -110,7 +113,7 @@
 png_combine_row(png_structp png_ptr, png_bytep row, int mask)
 {
 #ifdef PNG_USE_LOCAL_ARRAYS
-   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 #endif
 
    png_debug(1,"in png_combine_row_asm\n");
@@ -135,394 +138,6 @@
    {
       switch (png_ptr->row_info.pixel_depth)
       {
-         case 1:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            int s_inc, s_start, s_end;
-            int m;
-            int shift;
-            png_uint_32 i;
-
-            sp = png_ptr->row_buf + 1;
-            dp = row;
-            m = 0x80;
-#if defined(PNG_READ_PACKSWAP_SUPPORTED)
-            if (png_ptr->transformations & PNG_PACKSWAP)
-            {
-                s_start = 0;
-                s_end = 7;
-                s_inc = 1;
-            }
-            else
-#endif
-            {
-                s_start = 7;
-                s_end = 0;
-                s_inc = -1;
-            }
-
-            shift = s_start;
-
-            for (i = 0; i < png_ptr->width; i++)
-            {
-               if (m & mask)
-               {
-                  int value;
-
-                  value = (*sp >> shift) & 0x1;
-                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
-                  *dp |= (png_byte)(value << shift);
-               }
-
-               if (shift == s_end)
-               {
-                  shift = s_start;
-                  sp++;
-                  dp++;
-               }
-               else
-                  shift += s_inc;
-
-               if (m == 1)
-                  m = 0x80;
-               else
-                  m >>= 1;
-            }
-            break;
-         }
-
-         case 2:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            int s_start, s_end, s_inc;
-            int m;
-            int shift;
-            png_uint_32 i;
-            int value;
-
-            sp = png_ptr->row_buf + 1;
-            dp = row;
-            m = 0x80;
-#if defined(PNG_READ_PACKSWAP_SUPPORTED)
-            if (png_ptr->transformations & PNG_PACKSWAP)
-            {
-               s_start = 0;
-               s_end = 6;
-               s_inc = 2;
-            }
-            else
-#endif
-            {
-               s_start = 6;
-               s_end = 0;
-               s_inc = -2;
-            }
-
-            shift = s_start;
-
-            for (i = 0; i < png_ptr->width; i++)
-            {
-               if (m & mask)
-               {
-                  value = (*sp >> shift) & 0x3;
-                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
-                  *dp |= (png_byte)(value << shift);
-               }
-
-               if (shift == s_end)
-               {
-                  shift = s_start;
-                  sp++;
-                  dp++;
-               }
-               else
-                  shift += s_inc;
-               if (m == 1)
-                  m = 0x80;
-               else
-                  m >>= 1;
-            }
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            int s_start, s_end, s_inc;
-            int m;
-            int shift;
-            png_uint_32 i;
-            int value;
-
-            sp = png_ptr->row_buf + 1;
-            dp = row;
-            m = 0x80;
-#if defined(PNG_READ_PACKSWAP_SUPPORTED)
-            if (png_ptr->transformations & PNG_PACKSWAP)
-            {
-               s_start = 0;
-               s_end = 4;
-               s_inc = 4;
-            }
-            else
-#endif
-            {
-               s_start = 4;
-               s_end = 0;
-               s_inc = -4;
-            }
-            shift = s_start;
-
-            for (i = 0; i < png_ptr->width; i++)
-            {
-               if (m & mask)
-               {
-                  value = (*sp >> shift) & 0xf;
-                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
-                  *dp |= (png_byte)(value << shift);
-               }
-
-               if (shift == s_end)
-               {
-                  shift = s_start;
-                  sp++;
-                  dp++;
-               }
-               else
-                  shift += s_inc;
-               if (m == 1)
-                  m = 0x80;
-               else
-                  m >>= 1;
-            }
-            break;
-         }
-
-         case 8:
-         {
-            png_bytep srcptr;
-            png_bytep dstptr;
-            png_uint_32 len;
-            int m;
-            int diff, unmask;
-
-            __int64 mask0=0x0102040810204080;
-
-#if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && mmx_supported */ )
-#else
-            if (mmx_supported)
-#endif
-            {
-               srcptr = png_ptr->row_buf + 1;
-               dstptr = row;
-               m = 0x80;
-               unmask = ~mask;
-               len  = png_ptr->width &~7;  //reduce to multiple of 8
-               diff = png_ptr->width & 7;  //amount lost
-
-               _asm
-               {
-                  movd       mm7, unmask   //load bit pattern
-                  psubb      mm6,mm6       //zero mm6
-                  punpcklbw  mm7,mm7
-                  punpcklwd  mm7,mm7
-                  punpckldq  mm7,mm7       //fill register with 8 masks
-
-                  movq       mm0,mask0
-
-                  pand       mm0,mm7       //nonzero if keep byte
-                  pcmpeqb    mm0,mm6       //zeros->1s, v versa
-
-                  mov        ecx,len       //load length of line (pixels)
-                  mov        esi,srcptr    //load source
-                  mov        ebx,dstptr    //load dest
-                  cmp        ecx,0         //lcr
-                  je         mainloop8end
-
-mainloop8:
-                  movq       mm4,[esi]
-                  pand       mm4,mm0
-                  movq       mm6,mm0
-                  pandn      mm6,[ebx]
-                  por        mm4,mm6
-                  movq       [ebx],mm4
-
-                  add        esi,8         //inc by 8 bytes processed
-                  add        ebx,8
-                  sub        ecx,8         //dec by 8 pixels processed
-
-                  ja         mainloop8
-mainloop8end:
-
-                  mov        ecx,diff
-                  cmp        ecx,0
-                  jz         end8
-
-                  mov        edx,mask
-                  sal        edx,24        //make low byte the high byte
-
-secondloop8:
-                  sal        edx,1         //move high bit to CF
-                  jnc        skip8         //if CF = 0
-                  mov        al,[esi]
-                  mov        [ebx],al
-skip8:
-                  inc        esi
-                  inc        ebx
-
-                  dec        ecx
-                  jnz        secondloop8
-end8:
-                  emms
-               }
-            }
-            else /* mmx not supported - use modified C routine */
-            {
-               register unsigned int incr1, initial_val, final_val;
-               png_size_t pixel_bytes;
-               png_uint_32 i;
-               register int disp = png_pass_inc[png_ptr->pass];
-               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
-
-               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
-               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
-                  pixel_bytes;
-               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
-               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
-               final_val = png_ptr->width*pixel_bytes;
-               incr1 = (disp)*pixel_bytes;
-               for (i = initial_val; i < final_val; i += incr1)
-               {
-                  png_memcpy(dstptr, srcptr, pixel_bytes);
-                  srcptr += incr1;
-                  dstptr += incr1;
-               }
-            } /* end of else */
-
-            break;
-         }       // end 8 bpp
-
-         case 16:
-         {
-            png_bytep srcptr;
-            png_bytep dstptr;
-            png_uint_32 len;
-            int unmask, diff;
-            __int64 mask1=0x0101020204040808,
-                    mask0=0x1010202040408080;
-
-#if !defined(PNG_1_0_X)
-            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
-                /* && mmx_supported */ )
-#else
-            if (mmx_supported)
-#endif
-            {
-               srcptr = png_ptr->row_buf + 1;
-               dstptr = row;
-
-               unmask = ~mask;
-               len     = (png_ptr->width)&~7;
-               diff = (png_ptr->width)&7;
-               _asm
-               {
-                  movd       mm7, unmask       //load bit pattern
-                  psubb      mm6,mm6           //zero mm6
-                  punpcklbw  mm7,mm7
-                  punpcklwd  mm7,mm7
-                  punpckldq  mm7,mm7           //fill register with 8 masks
-
-                  movq       mm0,mask0
-                  movq       mm1,mask1
-
-                  pand       mm0,mm7
-                  pand       mm1,mm7
-
-                  pcmpeqb    mm0,mm6
-                  pcmpeqb    mm1,mm6
-
-                  mov        ecx,len           //load length of line
-                  mov        esi,srcptr        //load source
-                  mov        ebx,dstptr        //load dest
-                  cmp        ecx,0             //lcr
-                  jz         mainloop16end
-
-mainloop16:
-                  movq       mm4,[esi]
-                  pand       mm4,mm0
-                  movq       mm6,mm0
-                  movq       mm7,[ebx]
-                  pandn      mm6,mm7
-                  por        mm4,mm6
-                  movq       [ebx],mm4
-
-                  movq       mm5,[esi+8]
-                  pand       mm5,mm1
-                  movq       mm7,mm1
-                  movq       mm6,[ebx+8]
-                  pandn      mm7,mm6
-                  por        mm5,mm7
-                  movq       [ebx+8],mm5
-
-                  add        esi,16            //inc by 16 bytes processed
-                  add        ebx,16
-                  sub        ecx,8             //dec by 8 pixels processed
-
-                  ja         mainloop16
-
-mainloop16end:
-                  mov        ecx,diff
-                  cmp        ecx,0
-                  jz         end16
-
-                  mov        edx,mask
-                  sal        edx,24            //make low byte the high byte
-secondloop16:
-                  sal        edx,1             //move high bit to CF
-                  jnc        skip16            //if CF = 0
-                  mov        ax,[esi]
-                  mov        [ebx],ax
-skip16:
-                  add        esi,2
-                  add        ebx,2
-
-                  dec        ecx
-                  jnz        secondloop16
-end16:
-                  emms
-               }
-            }
-            else /* mmx not supported - use modified C routine */
-            {
-               register unsigned int incr1, initial_val, final_val;
-               png_size_t pixel_bytes;
-               png_uint_32 i;
-               register int disp = png_pass_inc[png_ptr->pass];
-               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
-
-               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
-               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
-                  pixel_bytes;
-               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
-               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
-               final_val = png_ptr->width*pixel_bytes;
-               incr1 = (disp)*pixel_bytes;
-               for (i = initial_val; i < final_val; i += incr1)
-               {
-                  png_memcpy(dstptr, srcptr, pixel_bytes);
-                  srcptr += incr1;
-                  dstptr += incr1;
-               }
-            } /* end of else */
-
-            break;
-         }       // end 16 bpp
-
          case 24:
          {
             png_bytep srcptr;
@@ -802,6 +417,394 @@
             break;
          }       // end 32 bpp
 
+         case 8:
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+            png_uint_32 len;
+            int m;
+            int diff, unmask;
+
+            __int64 mask0=0x0102040810204080;
+
+#if !defined(PNG_1_0_X)
+            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
+                /* && mmx_supported */ )
+#else
+            if (mmx_supported)
+#endif
+            {
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+               m = 0x80;
+               unmask = ~mask;
+               len  = png_ptr->width &~7;  //reduce to multiple of 8
+               diff = png_ptr->width & 7;  //amount lost
+
+               _asm
+               {
+                  movd       mm7, unmask   //load bit pattern
+                  psubb      mm6,mm6       //zero mm6
+                  punpcklbw  mm7,mm7
+                  punpcklwd  mm7,mm7
+                  punpckldq  mm7,mm7       //fill register with 8 masks
+
+                  movq       mm0,mask0
+
+                  pand       mm0,mm7       //nonzero if keep byte
+                  pcmpeqb    mm0,mm6       //zeros->1s, v versa
+
+                  mov        ecx,len       //load length of line (pixels)
+                  mov        esi,srcptr    //load source
+                  mov        ebx,dstptr    //load dest
+                  cmp        ecx,0         //lcr
+                  je         mainloop8end
+
+mainloop8:
+                  movq       mm4,[esi]
+                  pand       mm4,mm0
+                  movq       mm6,mm0
+                  pandn      mm6,[ebx]
+                  por        mm4,mm6
+                  movq       [ebx],mm4
+
+                  add        esi,8         //inc by 8 bytes processed
+                  add        ebx,8
+                  sub        ecx,8         //dec by 8 pixels processed
+
+                  ja         mainloop8
+mainloop8end:
+
+                  mov        ecx,diff
+                  cmp        ecx,0
+                  jz         end8
+
+                  mov        edx,mask
+                  sal        edx,24        //make low byte the high byte
+
+secondloop8:
+                  sal        edx,1         //move high bit to CF
+                  jnc        skip8         //if CF = 0
+                  mov        al,[esi]
+                  mov        [ebx],al
+skip8:
+                  inc        esi
+                  inc        ebx
+
+                  dec        ecx
+                  jnz        secondloop8
+end8:
+                  emms
+               }
+            }
+            else /* mmx not supported - use modified C routine */
+            {
+               register unsigned int incr1, initial_val, final_val;
+               png_size_t pixel_bytes;
+               png_uint_32 i;
+               register int disp = png_pass_inc[png_ptr->pass];
+               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
+
+               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
+               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
+                  pixel_bytes;
+               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
+               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
+               final_val = png_ptr->width*pixel_bytes;
+               incr1 = (disp)*pixel_bytes;
+               for (i = initial_val; i < final_val; i += incr1)
+               {
+                  png_memcpy(dstptr, srcptr, pixel_bytes);
+                  srcptr += incr1;
+                  dstptr += incr1;
+               }
+            } /* end of else */
+
+            break;
+         }       // end 8 bpp
+
+         case 1:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_inc, s_start, s_end;
+            int m;
+            int shift;
+            png_uint_32 i;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+            else
+#endif
+            {
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  int value;
+
+                  value = (*sp >> shift) & 0x1;
+                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+
+         case 2:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_start, s_end, s_inc;
+            int m;
+            int shift;
+            png_uint_32 i;
+            int value;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+            else
+#endif
+            {
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0x3;
+                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+
+         case 4:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int s_start, s_end, s_inc;
+            int m;
+            int shift;
+            png_uint_32 i;
+            int value;
+
+            sp = png_ptr->row_buf + 1;
+            dp = row;
+            m = 0x80;
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+            else
+#endif
+            {
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            shift = s_start;
+
+            for (i = 0; i < png_ptr->width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0xf;
+                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+
+         case 16:
+         {
+            png_bytep srcptr;
+            png_bytep dstptr;
+            png_uint_32 len;
+            int unmask, diff;
+            __int64 mask1=0x0101020204040808,
+                    mask0=0x1010202040408080;
+
+#if !defined(PNG_1_0_X)
+            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
+                /* && mmx_supported */ )
+#else
+            if (mmx_supported)
+#endif
+            {
+               srcptr = png_ptr->row_buf + 1;
+               dstptr = row;
+
+               unmask = ~mask;
+               len     = (png_ptr->width)&~7;
+               diff = (png_ptr->width)&7;
+               _asm
+               {
+                  movd       mm7, unmask       //load bit pattern
+                  psubb      mm6,mm6           //zero mm6
+                  punpcklbw  mm7,mm7
+                  punpcklwd  mm7,mm7
+                  punpckldq  mm7,mm7           //fill register with 8 masks
+
+                  movq       mm0,mask0
+                  movq       mm1,mask1
+
+                  pand       mm0,mm7
+                  pand       mm1,mm7
+
+                  pcmpeqb    mm0,mm6
+                  pcmpeqb    mm1,mm6
+
+                  mov        ecx,len           //load length of line
+                  mov        esi,srcptr        //load source
+                  mov        ebx,dstptr        //load dest
+                  cmp        ecx,0             //lcr
+                  jz         mainloop16end
+
+mainloop16:
+                  movq       mm4,[esi]
+                  pand       mm4,mm0
+                  movq       mm6,mm0
+                  movq       mm7,[ebx]
+                  pandn      mm6,mm7
+                  por        mm4,mm6
+                  movq       [ebx],mm4
+
+                  movq       mm5,[esi+8]
+                  pand       mm5,mm1
+                  movq       mm7,mm1
+                  movq       mm6,[ebx+8]
+                  pandn      mm7,mm6
+                  por        mm5,mm7
+                  movq       [ebx+8],mm5
+
+                  add        esi,16            //inc by 16 bytes processed
+                  add        ebx,16
+                  sub        ecx,8             //dec by 8 pixels processed
+
+                  ja         mainloop16
+
+mainloop16end:
+                  mov        ecx,diff
+                  cmp        ecx,0
+                  jz         end16
+
+                  mov        edx,mask
+                  sal        edx,24            //make low byte the high byte
+secondloop16:
+                  sal        edx,1             //move high bit to CF
+                  jnc        skip16            //if CF = 0
+                  mov        ax,[esi]
+                  mov        [ebx],ax
+skip16:
+                  add        esi,2
+                  add        ebx,2
+
+                  dec        ecx
+                  jnz        secondloop16
+end16:
+                  emms
+               }
+            }
+            else /* mmx not supported - use modified C routine */
+            {
+               register unsigned int incr1, initial_val, final_val;
+               png_size_t pixel_bytes;
+               png_uint_32 i;
+               register int disp = png_pass_inc[png_ptr->pass];
+               int offset_table[7] = {0, 4, 0, 2, 0, 1, 0};
+
+               pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
+               srcptr = png_ptr->row_buf + 1 + offset_table[png_ptr->pass]*
+                  pixel_bytes;
+               dstptr = row + offset_table[png_ptr->pass]*pixel_bytes;
+               initial_val = offset_table[png_ptr->pass]*pixel_bytes;
+               final_val = png_ptr->width*pixel_bytes;
+               incr1 = (disp)*pixel_bytes;
+               for (i = initial_val; i < final_val; i += incr1)
+               {
+                  png_memcpy(dstptr, srcptr, pixel_bytes);
+                  srcptr += incr1;
+                  dstptr += incr1;
+               }
+            } /* end of else */
+
+            break;
+         }       // end 16 bpp
+
          case 48:
          {
             png_bytep srcptr;
@@ -927,9 +930,11 @@
                   jnc        skip48            //if CF = 0
                   mov        eax,[esi]
                   mov        [ebx],eax
+                  mov        ax,[esi+4]       // These 2 lines added 20070717
+                  mov        [ebx+4],ax       // Glenn R-P
 skip48:
-                  add        esi,4
-                  add        ebx,4
+                  add        esi,6            // Changed 4 to 6 on these 2
+                  add        ebx,6            // lines.  Glenn R-P 20070717
 
                   dec        ecx
                   jnz        secondloop48
@@ -1005,7 +1010,7 @@
    int pass = png_ptr->pass;
    png_uint_32 transformations = png_ptr->transformations;
 #ifdef PNG_USE_LOCAL_ARRAYS
-   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
 #endif
 
    png_debug(1,"in png_do_read_interlace\n");
@@ -1224,70 +1229,7 @@
             {
                if (pixel_bytes == 3)
                {
-                  if (((pass == 0) || (pass == 1)) && width)
-                  {
-                     _asm
-                     {
-                        mov esi, sptr
-                        mov edi, dp
-                        mov ecx, width
-                        sub edi, 21   // (png_pass_inc[pass] - 1)*pixel_bytes
-loop_pass0:
-                        movd mm0, [esi]     ; X X X X X v2 v1 v0
-                        pand mm0, const4    ; 0 0 0 0 0 v2 v1 v0
-                        movq mm1, mm0       ; 0 0 0 0 0 v2 v1 v0
-                        psllq mm0, 16       ; 0 0 0 v2 v1 v0 0 0
-                        movq mm2, mm0       ; 0 0 0 v2 v1 v0 0 0
-                        psllq mm0, 24       ; v2 v1 v0 0 0 0 0 0
-                        psrlq mm1, 8        ; 0 0 0 0 0 0 v2 v1
-                        por mm0, mm2        ; v2 v1 v0 v2 v1 v0 0 0
-                        por mm0, mm1        ; v2 v1 v0 v2 v1 v0 v2 v1
-                        movq mm3, mm0       ; v2 v1 v0 v2 v1 v0 v2 v1
-                        psllq mm0, 16       ; v0 v2 v1 v0 v2 v1 0 0
-                        movq mm4, mm3       ; v2 v1 v0 v2 v1 v0 v2 v1
-                        punpckhdq mm3, mm0  ; v0 v2 v1 v0 v2 v1 v0 v2
-                        movq [edi+16] , mm4
-                        psrlq mm0, 32       ; 0 0 0 0 v0 v2 v1 v0
-                        movq [edi+8] , mm3
-                        punpckldq mm0, mm4  ; v1 v0 v2 v1 v0 v2 v1 v0
-                        sub esi, 3
-                        movq [edi], mm0
-                        sub edi, 24
-                        //sub esi, 3
-                        dec ecx
-                        jnz loop_pass0
-                        EMMS
-                     }
-                  }
-                  else if (((pass == 2) || (pass == 3)) && width)
-                  {
-                     _asm
-                     {
-                        mov esi, sptr
-                        mov edi, dp
-                        mov ecx, width
-                        sub edi, 9   // (png_pass_inc[pass] - 1)*pixel_bytes
-loop_pass2:
-                        movd mm0, [esi]     ; X X X X X v2 v1 v0
-                        pand mm0, const4    ; 0 0 0 0 0 v2 v1 v0
-                        movq mm1, mm0       ; 0 0 0 0 0 v2 v1 v0
-                        psllq mm0, 16       ; 0 0 0 v2 v1 v0 0 0
-                        movq mm2, mm0       ; 0 0 0 v2 v1 v0 0 0
-                        psllq mm0, 24       ; v2 v1 v0 0 0 0 0 0
-                        psrlq mm1, 8        ; 0 0 0 0 0 0 v2 v1
-                        por mm0, mm2        ; v2 v1 v0 v2 v1 v0 0 0
-                        por mm0, mm1        ; v2 v1 v0 v2 v1 v0 v2 v1
-                        movq [edi+4], mm0   ; move to memory
-                        psrlq mm0, 16       ; 0 0 v2 v1 v0 v2 v1 v0
-                        movd [edi], mm0     ; move to memory
-                        sub esi, 3
-                        sub edi, 12
-                        dec ecx
-                        jnz loop_pass2
-                        EMMS
-                     }
-                  }
-                  else if (width) /* && ((pass == 4) || (pass == 5)) */
+                  if (((pass == 4) || (pass == 5)) && width)
                   {
                      int width_mmx = ((width >> 1) << 1) - 8;
                      if (width_mmx < 0)
@@ -1341,11 +1283,159 @@
                         sptr -= 3;
                      }
                   }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     _asm
+                     {
+                        mov esi, sptr
+                        mov edi, dp
+                        mov ecx, width
+                        sub edi, 9   // (png_pass_inc[pass] - 1)*pixel_bytes
+loop_pass2:
+                        movd mm0, [esi]     ; X X X X X v2 v1 v0
+                        pand mm0, const4    ; 0 0 0 0 0 v2 v1 v0
+                        movq mm1, mm0       ; 0 0 0 0 0 v2 v1 v0
+                        psllq mm0, 16       ; 0 0 0 v2 v1 v0 0 0
+                        movq mm2, mm0       ; 0 0 0 v2 v1 v0 0 0
+                        psllq mm0, 24       ; v2 v1 v0 0 0 0 0 0
+                        psrlq mm1, 8        ; 0 0 0 0 0 0 v2 v1
+                        por mm0, mm2        ; v2 v1 v0 v2 v1 v0 0 0
+                        por mm0, mm1        ; v2 v1 v0 v2 v1 v0 v2 v1
+                        movq [edi+4], mm0   ; move to memory
+                        psrlq mm0, 16       ; 0 0 v2 v1 v0 v2 v1 v0
+                        movd [edi], mm0     ; move to memory
+                        sub esi, 3
+                        sub edi, 12
+                        dec ecx
+                        jnz loop_pass2
+                        EMMS
+                     }
+                  }
+                  else if (width) /* && ((pass == 0) || (pass == 1))) */
+                  {
+                     _asm
+                     {
+                        mov esi, sptr
+                        mov edi, dp
+                        mov ecx, width
+                        sub edi, 21   // (png_pass_inc[pass] - 1)*pixel_bytes
+loop_pass0:
+                        movd mm0, [esi]     ; X X X X X v2 v1 v0
+                        pand mm0, const4    ; 0 0 0 0 0 v2 v1 v0
+                        movq mm1, mm0       ; 0 0 0 0 0 v2 v1 v0
+                        psllq mm0, 16       ; 0 0 0 v2 v1 v0 0 0
+                        movq mm2, mm0       ; 0 0 0 v2 v1 v0 0 0
+                        psllq mm0, 24       ; v2 v1 v0 0 0 0 0 0
+                        psrlq mm1, 8        ; 0 0 0 0 0 0 v2 v1
+                        por mm0, mm2        ; v2 v1 v0 v2 v1 v0 0 0
+                        por mm0, mm1        ; v2 v1 v0 v2 v1 v0 v2 v1
+                        movq mm3, mm0       ; v2 v1 v0 v2 v1 v0 v2 v1
+                        psllq mm0, 16       ; v0 v2 v1 v0 v2 v1 0 0
+                        movq mm4, mm3       ; v2 v1 v0 v2 v1 v0 v2 v1
+                        punpckhdq mm3, mm0  ; v0 v2 v1 v0 v2 v1 v0 v2
+                        movq [edi+16] , mm4
+                        psrlq mm0, 32       ; 0 0 0 0 v0 v2 v1 v0
+                        movq [edi+8] , mm3
+                        punpckldq mm0, mm4  ; v1 v0 v2 v1 v0 v2 v1 v0
+                        sub esi, 3
+                        movq [edi], mm0
+                        sub edi, 24
+                        //sub esi, 3
+                        dec ecx
+                        jnz loop_pass0
+                        EMMS
+                     }
+                  }
                } /* end of pixel_bytes == 3 */
 
                else if (pixel_bytes == 1)
                {
-                  if (((pass == 0) || (pass == 1)) && width)
+                  if (((pass == 4) || (pass == 5)) && width)
+                  {
+                     int width_mmx = ((width >> 3) << 3);
+                     width -= width_mmx;
+                     if (width_mmx)
+                     {
+                        _asm
+                        {
+                           mov esi, sptr
+                           mov edi, dp
+                           mov ecx, width_mmx
+                           sub edi, 15
+                           sub esi, 7
+loop1_pass4:
+                           movq mm0, [esi]     ; v0 v1 v2 v3 v4 v5 v6 v7
+                           movq mm1, mm0       ; v0 v1 v2 v3 v4 v5 v6 v7
+                           punpcklbw mm0, mm0  ; v4 v4 v5 v5 v6 v6 v7 v7
+                           //movq mm1, mm0     ; v0 v0 v1 v1 v2 v2 v3 v3
+                           punpckhbw mm1, mm1  ;v0 v0 v1 v1 v2 v2 v3 v3
+                           movq [edi+8], mm1   ; move to memory v0 v1 v2 and v3
+                           sub esi, 8
+                           movq [edi], mm0     ; move to memory v4 v5 v6 and v7
+                           //sub esi, 4
+                           sub edi, 16
+                           sub ecx, 8
+                           jnz loop1_pass4
+                           EMMS
+                        }
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*2;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           *dp-- = *sptr;
+                        }
+                        sptr --;
+                     }
+                  }
+                  else if (((pass == 2) || (pass == 3)) && width)
+                  {
+                     int width_mmx = ((width >> 2) << 2);
+                     width -= width_mmx;
+                     if (width_mmx)
+                     {
+                        _asm
+                        {
+                           mov esi, sptr
+                           mov edi, dp
+                           mov ecx, width_mmx
+                           sub edi, 15
+                           sub esi, 3
+loop1_pass2:
+                           movd mm0, [esi]     ; X X X X v0 v1 v2 v3
+                           punpcklbw mm0, mm0  ; v0 v0 v1 v1 v2 v2 v3 v3
+                           movq mm1, mm0       ; v0 v0 v1 v1 v2 v2 v3 v3
+                           punpcklwd mm0, mm0  ; v2 v2 v2 v2 v3 v3 v3 v3
+                           punpckhwd mm1, mm1  ; v0 v0 v0 v0 v1 v1 v1 v1
+                           movq [edi], mm0     ; move to memory v2 and v3
+                           sub esi, 4
+                           movq [edi+8], mm1   ; move to memory v1     and v0
+                           sub edi, 16
+                           sub ecx, 4
+                           jnz loop1_pass2
+                           EMMS
+                        }
+                     }
+
+                     sptr -= width_mmx;
+                     dp -= width_mmx*4;
+                     for (i = width; i; i--)
+                     {
+                        int j;
+
+                        for (j = 0; j < png_pass_inc[pass]; j++)
+                        {
+                           *dp-- = *sptr;
+                        }
+                        sptr --;
+                     }
+                  }
+                  else if (width) /* && ((pass == 0) || (pass == 1))) */
                   {
                      int width_mmx = ((width >> 2) << 2);
                      width -= width_mmx;
@@ -1412,98 +1502,13 @@
                         sptr--;
                      }
                   }
-                  else if (((pass == 2) || (pass == 3)) && width)
-                  {
-                     int width_mmx = ((width >> 2) << 2);
-                     width -= width_mmx;
-                     if (width_mmx)
-                     {
-                        _asm
-                        {
-                           mov esi, sptr
-                           mov edi, dp
-                           mov ecx, width_mmx
-                           sub edi, 15
-                           sub esi, 3
-loop1_pass2:
-                           movd mm0, [esi]     ; X X X X v0 v1 v2 v3
-                           punpcklbw mm0, mm0  ; v0 v0 v1 v1 v2 v2 v3 v3
-                           movq mm1, mm0       ; v0 v0 v1 v1 v2 v2 v3 v3
-                           punpcklwd mm0, mm0  ; v2 v2 v2 v2 v3 v3 v3 v3
-                           punpckhwd mm1, mm1  ; v0 v0 v0 v0 v1 v1 v1 v1
-                           movq [edi], mm0     ; move to memory v2 and v3
-                           sub esi, 4
-                           movq [edi+8], mm1   ; move to memory v1     and v0
-                           sub edi, 16
-                           sub ecx, 4
-                           jnz loop1_pass2
-                           EMMS
-                        }
-                     }
-
-                     sptr -= width_mmx;
-                     dp -= width_mmx*4;
-                     for (i = width; i; i--)
-                     {
-                        int j;
-
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           *dp-- = *sptr;
-                        }
-                        sptr --;
-                     }
-                  }
-                  else if (width) /* && ((pass == 4) || (pass == 5))) */
-                  {
-                     int width_mmx = ((width >> 3) << 3);
-                     width -= width_mmx;
-                     if (width_mmx)
-                     {
-                        _asm
-                        {
-                           mov esi, sptr
-                           mov edi, dp
-                           mov ecx, width_mmx
-                           sub edi, 15
-                           sub esi, 7
-loop1_pass4:
-                           movq mm0, [esi]     ; v0 v1 v2 v3 v4 v5 v6 v7
-                           movq mm1, mm0       ; v0 v1 v2 v3 v4 v5 v6 v7
-                           punpcklbw mm0, mm0  ; v4 v4 v5 v5 v6 v6 v7 v7
-                           //movq mm1, mm0     ; v0 v0 v1 v1 v2 v2 v3 v3
-                           punpckhbw mm1, mm1  ;v0 v0 v1 v1 v2 v2 v3 v3
-                           movq [edi+8], mm1   ; move to memory v0 v1 v2 and v3
-                           sub esi, 8
-                           movq [edi], mm0     ; move to memory v4 v5 v6 and v7
-                           //sub esi, 4
-                           sub edi, 16
-                           sub ecx, 8
-                           jnz loop1_pass4
-                           EMMS
-                        }
-                     }
-
-                     sptr -= width_mmx;
-                     dp -= width_mmx*2;
-                     for (i = width; i; i--)
-                     {
-                        int j;
-
-                        for (j = 0; j < png_pass_inc[pass]; j++)
-                        {
-                           *dp-- = *sptr;
-                        }
-                        sptr --;
-                     }
-                  }
                } /* end of pixel_bytes == 1 */
 
                else if (pixel_bytes == 2)
                {
-                  if (((pass == 0) || (pass == 1)) && width)
+                  if (((pass == 4) || (pass == 5)) && width)
                   {
-                     int width_mmx = ((width >> 1) << 1);
+                     int width_mmx = ((width >> 1) << 1) ;
                      width -= width_mmx;
                      if (width_mmx)
                      {
@@ -1513,27 +1518,21 @@
                            mov edi, dp
                            mov ecx, width_mmx
                            sub esi, 2
-                           sub edi, 30
-loop2_pass0:
+                           sub edi, 6
+loop2_pass4:
                            movd mm0, [esi]        ; X X X X v1 v0 v3 v2
                            punpcklwd mm0, mm0     ; v1 v0 v1 v0 v3 v2 v3 v2
-                           movq mm1, mm0          ; v1 v0 v1 v0 v3 v2 v3 v2
-                           punpckldq mm0, mm0     ; v3 v2 v3 v2 v3 v2 v3 v2
-                           punpckhdq mm1, mm1     ; v1 v0 v1 v0 v1 v0 v1 v0
-                           movq [edi], mm0
-                           movq [edi + 8], mm0
-                           movq [edi + 16], mm1
-                           movq [edi + 24], mm1
                            sub esi, 4
-                           sub edi, 32
+                           movq [edi], mm0
+                           sub edi, 8
                            sub ecx, 2
-                           jnz loop2_pass0
+                           jnz loop2_pass4
                            EMMS
                         }
                      }
 
                      sptr -= (width_mmx*2 - 2);            // sign fixed
-                     dp -= (width_mmx*16 - 2);            // sign fixed
+                     dp -= (width_mmx*4 - 2);            // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
@@ -1592,9 +1591,9 @@
                         }
                      }
                   }
-                  else if (width)  // pass == 4 or 5
+                  else if (width) /* && ((pass == 0) || (pass == 1))) */
                   {
-                     int width_mmx = ((width >> 1) << 1) ;
+                     int width_mmx = ((width >> 1) << 1);
                      width -= width_mmx;
                      if (width_mmx)
                      {
@@ -1604,21 +1603,27 @@
                            mov edi, dp
                            mov ecx, width_mmx
                            sub esi, 2
-                           sub edi, 6
-loop2_pass4:
+                           sub edi, 30
+loop2_pass0:
                            movd mm0, [esi]        ; X X X X v1 v0 v3 v2
                            punpcklwd mm0, mm0     ; v1 v0 v1 v0 v3 v2 v3 v2
-                           sub esi, 4
+                           movq mm1, mm0          ; v1 v0 v1 v0 v3 v2 v3 v2
+                           punpckldq mm0, mm0     ; v3 v2 v3 v2 v3 v2 v3 v2
+                           punpckhdq mm1, mm1     ; v1 v0 v1 v0 v1 v0 v1 v0
                            movq [edi], mm0
-                           sub edi, 8
+                           movq [edi + 8], mm0
+                           movq [edi + 16], mm1
+                           movq [edi + 24], mm1
+                           sub esi, 4
+                           sub edi, 32
                            sub ecx, 2
-                           jnz loop2_pass4
+                           jnz loop2_pass0
                            EMMS
                         }
                      }
 
                      sptr -= (width_mmx*2 - 2);            // sign fixed
-                     dp -= (width_mmx*4 - 2);            // sign fixed
+                     dp -= (width_mmx*16 - 2);            // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
@@ -1636,7 +1641,7 @@
 
                else if (pixel_bytes == 4)
                {
-                  if (((pass == 0) || (pass == 1)) && width)
+                  if (((pass == 4) || (pass == 5)) && width)
                   {
                      int width_mmx = ((width >> 1) << 1) ;
                      width -= width_mmx;
@@ -1648,30 +1653,24 @@
                            mov edi, dp
                            mov ecx, width_mmx
                            sub esi, 4
-                           sub edi, 60
-loop4_pass0:
-                           movq mm0, [esi]        ; v3 v2 v1 v0 v7 v6 v5 v4
-                           movq mm1, mm0          ; v3 v2 v1 v0 v7 v6 v5 v4
-                           punpckldq mm0, mm0     ; v7 v6 v5 v4 v7 v6 v5 v4
-                           punpckhdq mm1, mm1     ; v3 v2 v1 v0 v3 v2 v1 v0
+                           sub edi, 12
+loop4_pass4:
+                           movq mm0, [esi]      ; v3 v2 v1 v0 v7 v6 v5 v4
+                           movq mm1, mm0        ; v3 v2 v1 v0 v7 v6 v5 v4
+                           punpckldq mm0, mm0   ; v7 v6 v5 v4 v7 v6 v5 v4
+                           punpckhdq mm1, mm1   ; v3 v2 v1 v0 v3 v2 v1 v0
                            movq [edi], mm0
-                           movq [edi + 8], mm0
-                           movq [edi + 16], mm0
-                           movq [edi + 24], mm0
-                           movq [edi+32], mm1
-                           movq [edi + 40], mm1
-                           movq [edi+ 48], mm1
                            sub esi, 8
-                           movq [edi + 56], mm1
-                           sub edi, 64
+                           movq [edi + 8], mm1
+                           sub edi, 16
                            sub ecx, 2
-                           jnz loop4_pass0
+                           jnz loop4_pass4
                            EMMS
                         }
                      }
 
-                     sptr -= (width_mmx*4 - 4);            // sign fixed
-                     dp -= (width_mmx*32 - 4);            // sign fixed
+                     sptr -= (width_mmx*4 - 4);          // sign fixed
+                     dp -= (width_mmx*8 - 4);            // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
@@ -1730,7 +1729,7 @@
                         }
                      }
                   }
-                  else if (width)  // pass == 4 or 5
+                  else if (width) /* && ((pass == 0) || (pass == 1))) */
                   {
                      int width_mmx = ((width >> 1) << 1) ;
                      width -= width_mmx;
@@ -1742,24 +1741,30 @@
                            mov edi, dp
                            mov ecx, width_mmx
                            sub esi, 4
-                           sub edi, 12
-loop4_pass4:
-                           movq mm0, [esi]      ; v3 v2 v1 v0 v7 v6 v5 v4
-                           movq mm1, mm0        ; v3 v2 v1 v0 v7 v6 v5 v4
-                           punpckldq mm0, mm0   ; v7 v6 v5 v4 v7 v6 v5 v4
-                           punpckhdq mm1, mm1   ; v3 v2 v1 v0 v3 v2 v1 v0
+                           sub edi, 60
+loop4_pass0:
+                           movq mm0, [esi]        ; v3 v2 v1 v0 v7 v6 v5 v4
+                           movq mm1, mm0          ; v3 v2 v1 v0 v7 v6 v5 v4
+                           punpckldq mm0, mm0     ; v7 v6 v5 v4 v7 v6 v5 v4
+                           punpckhdq mm1, mm1     ; v3 v2 v1 v0 v3 v2 v1 v0
                            movq [edi], mm0
+                           movq [edi + 8], mm0
+                           movq [edi + 16], mm0
+                           movq [edi + 24], mm0
+                           movq [edi+32], mm1
+                           movq [edi + 40], mm1
+                           movq [edi+ 48], mm1
                            sub esi, 8
-                           movq [edi + 8], mm1
-                           sub edi, 16
+                           movq [edi + 56], mm1
+                           sub edi, 64
                            sub ecx, 2
-                           jnz loop4_pass4
+                           jnz loop4_pass0
                            EMMS
                         }
                      }
 
-                     sptr -= (width_mmx*4 - 4);          // sign fixed
-                     dp -= (width_mmx*8 - 4);            // sign fixed
+                     sptr -= (width_mmx*4 - 4);            // sign fixed
+                     dp -= (width_mmx*32 - 4);            // sign fixed
                      for (i = width; i; i--)
                      {
                         png_byte v[8];
@@ -3693,32 +3698,32 @@
    png_debug(1, "in png_read_filter_row\n");
    switch (filter)
    {
-      case 0: sprintf(filnm, "none");
+      case 0: png_snprintf(filnm, 10, "none");
          break;
 #if !defined(PNG_1_0_X)
-      case 1: sprintf(filnm, "sub-%s",
+      case 1: png_snprintf(filnm, 10, "sub-%s",
         (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB)? "MMX" : "x86");
          break;
-      case 2: sprintf(filnm, "up-%s",
+      case 2: png_snprintf(filnm, 10, "up-%s",
         (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP)? "MMX" : "x86");
          break;
-      case 3: sprintf(filnm, "avg-%s",
+      case 3: png_snprintf(filnm, 10, "avg-%s",
         (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG)? "MMX" : "x86");
          break;
-      case 4: sprintf(filnm, "Paeth-%s",
+      case 4: png_snprintf(filnm, 10, "Paeth-%s",
         (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH)? "MMX":"x86");
          break;
 #else
-      case 1: sprintf(filnm, "sub");
+      case 1: png_snprintf(filnm, 10, "sub");
          break;
-      case 2: sprintf(filnm, "up");
+      case 2: png_snprintf(filnm, 10, "up");
          break;
-      case 3: sprintf(filnm, "avg");
+      case 3: png_snprintf(filnm, 10, "avg");
          break;
-      case 4: sprintf(filnm, "Paeth");
+      case 4: png_snprintf(filnm, 10, "Paeth");
          break;
 #endif
-      default: sprintf(filnm, "unknw");
+      default: png_snprintf(filnm, 10, "unknw");
          break;
    }
    png_debug2(0,"row=%5d, %s, ", png_ptr->row_number, filnm);
@@ -3901,4 +3906,4 @@
    }
 }
 
-#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGVCRD */
+#endif /* PNG_MMX_CODE_SUPPORTED && PNG_USE_PNGVCRD */
diff --git a/pngwio.c b/pngwio.c
index 4d9d57e..371a4fa 100644
--- a/pngwio.c
+++ b/pngwio.c
@@ -3,7 +3,7 @@
  *
  * Last changed in libpng 1.2.13 November 13, 2006
  * For conditions of distribution and use, see copyright notice in png.h
- * Copyright (c) 1998-2002 Glenn Randers-Pehrson
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
  * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
  *
diff --git a/pngwrite.c b/pngwrite.c
index df7ade3..b6dccf7 100644
--- a/pngwrite.c
+++ b/pngwrite.c
@@ -517,11 +517,13 @@
         char msg[80];
         if (user_png_ver)
         {
-          sprintf(msg, "Application was compiled with png.h from libpng-%.20s",
+          png_snprintf(msg, 80,
+             "Application was compiled with png.h from libpng-%.20s",
              user_png_ver);
           png_warning(png_ptr, msg);
         }
-        sprintf(msg, "Application  is  running with png.c from libpng-%.20s",
+        png_snprintf(msg, 80,
+           "Application  is  running with png.c from libpng-%.20s",
            png_libpng_ver);
         png_warning(png_ptr, msg);
 #endif
@@ -587,11 +589,13 @@
       png_ptr->warning_fn=NULL;
       if (user_png_ver)
       {
-        sprintf(msg, "Application was compiled with png.h from libpng-%.20s",
+        png_snprintf(msg, 80,
+           "Application was compiled with png.h from libpng-%.20s",
            user_png_ver);
         png_warning(png_ptr, msg);
       }
-      sprintf(msg, "Application  is  running with png.c from libpng-%.20s",
+      png_snprintf(msg, 80,
+         "Application  is  running with png.c from libpng-%.20s",
          png_libpng_ver);
       png_warning(png_ptr, msg);
    }
@@ -1137,15 +1141,26 @@
    {
       switch (filters & (PNG_ALL_FILTERS | 0x07))
       {
+#ifndef PNG_NO_WRITE_FILTER
          case 5:
          case 6:
          case 7: png_warning(png_ptr, "Unknown row filter for method 0");
-         case PNG_FILTER_VALUE_NONE:  png_ptr->do_filter=PNG_FILTER_NONE; break;
-         case PNG_FILTER_VALUE_SUB:   png_ptr->do_filter=PNG_FILTER_SUB;  break;
-         case PNG_FILTER_VALUE_UP:    png_ptr->do_filter=PNG_FILTER_UP;   break;
-         case PNG_FILTER_VALUE_AVG:   png_ptr->do_filter=PNG_FILTER_AVG;  break;
-         case PNG_FILTER_VALUE_PAETH: png_ptr->do_filter=PNG_FILTER_PAETH;break;
+#endif /* PNG_NO_WRITE_FILTER */
+         case PNG_FILTER_VALUE_NONE:
+              png_ptr->do_filter=PNG_FILTER_NONE; break;
+#ifndef PNG_NO_WRITE_FILTER
+         case PNG_FILTER_VALUE_SUB:
+              png_ptr->do_filter=PNG_FILTER_SUB; break;
+         case PNG_FILTER_VALUE_UP:
+              png_ptr->do_filter=PNG_FILTER_UP; break;
+         case PNG_FILTER_VALUE_AVG:
+              png_ptr->do_filter=PNG_FILTER_AVG; break;
+         case PNG_FILTER_VALUE_PAETH:
+              png_ptr->do_filter=PNG_FILTER_PAETH; break;
          default: png_ptr->do_filter = (png_byte)filters; break;
+#else
+         default: png_warning(png_ptr, "Unknown row filter for method 0");
+#endif /* PNG_NO_WRITE_FILTER */
       }
 
       /* If we have allocated the row_buf, this means we have already started
@@ -1159,6 +1174,7 @@
        */
       if (png_ptr->row_buf != NULL)
       {
+#ifndef PNG_NO_WRITE_FILTER
          if ((png_ptr->do_filter & PNG_FILTER_SUB) && png_ptr->sub_row == NULL)
          {
             png_ptr->sub_row = (png_bytep)png_malloc(png_ptr,
@@ -1213,6 +1229,7 @@
          }
 
          if (png_ptr->do_filter == PNG_NO_FILTERS)
+#endif /* PNG_NO_WRITE_FILTER */
             png_ptr->do_filter = PNG_FILTER_NONE;
       }
    }
@@ -1511,8 +1528,8 @@
    /* It is REQUIRED to call this to finish writing the rest of the file */
    png_write_end(png_ptr, info_ptr);
 
-   if(transforms == 0 || params == NULL)
-      /* quiet compiler warnings */ return;
+   transforms = transforms; /* quiet compiler warnings */
+   params = params;
 }
 #endif
 #endif /* PNG_WRITE_SUPPORTED */
diff --git a/pngwutil.c b/pngwutil.c
index 314bea2..52f8c12 100644
--- a/pngwutil.c
+++ b/pngwutil.c
@@ -1,7 +1,7 @@
 
 /* pngwutil.c - utilities to write a PNG file
  *
- * Last changed in libpng 1.2.15 January 5, 2007
+ * Last changed in libpng 1.2.19 July 31, 2007
  * For conditions of distribution and use, see copyright notice in png.h
  * Copyright (c) 1998-2007 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
@@ -182,7 +182,7 @@
    {
 #if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
       char msg[50];
-      sprintf(msg, "Unknown compression type %d", compression);
+      png_snprintf(msg, 50, "Unknown compression type %d", compression);
       png_warning(png_ptr, msg);
 #else
       png_warning(png_ptr, "Unknown compression type");
@@ -578,7 +578,7 @@
    png_ptr->num_palette = (png_uint_16)num_pal;
    png_debug1(3, "num_palette = %d\n", png_ptr->num_palette);
 
-   png_write_chunk_start(png_ptr, (png_bytep)png_PLTE, num_pal * 3);
+   png_write_chunk_start(png_ptr, png_PLTE, num_pal * 3);
 #ifndef PNG_NO_POINTER_INDEXING
    for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++)
    {
@@ -783,7 +783,7 @@
           PNG_COMPRESSION_TYPE_BASE, &comp);
 
    /* make sure we include the NULL after the name and the compression type */
-   png_write_chunk_start(png_ptr, (png_bytep)png_iCCP,
+   png_write_chunk_start(png_ptr, png_iCCP,
           (png_uint_32)name_len+profile_len+2);
    new_name[name_len+1]=0x00;
    png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 2);
@@ -823,7 +823,7 @@
    }
 
    /* make sure we include the NULL after the name */
-   png_write_chunk_start(png_ptr, (png_bytep)png_sPLT,
+   png_write_chunk_start(png_ptr, png_sPLT,
           (png_uint_32)(name_len + 2 + palette_size));
    png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 1);
    png_write_chunk_data(png_ptr, (png_bytep)&spalette->depth, 1);
@@ -1180,7 +1180,7 @@
       return;
    }
 
-   png_write_chunk_start(png_ptr, (png_bytep)png_hIST, (png_uint_32)(num_hist * 2));
+   png_write_chunk_start(png_ptr, png_hIST, (png_uint_32)(num_hist * 2));
    for (i = 0; i < num_hist; i++)
    {
       png_save_uint_16(buf, hist[i]);
@@ -1231,12 +1231,14 @@
    /* Replace non-printing characters with a blank and print a warning */
    for (kp = key, dp = *new_key; *kp != '\0'; kp++, dp++)
    {
-      if (*kp < 0x20 || (*kp > 0x7E && (png_byte)*kp < 0xA1))
+      if ((png_byte)*kp < 0x20 ||
+         ((png_byte)*kp > 0x7E && (png_byte)*kp < 0xA1))
       {
 #if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
          char msg[40];
 
-         sprintf(msg, "invalid keyword character 0x%02X", *kp);
+         png_snprintf(msg, 40,
+           "invalid keyword character 0x%02X", (png_byte)*kp);
          png_warning(png_ptr, msg);
 #else
          png_warning(png_ptr, "invalid character in keyword");
@@ -1397,8 +1399,6 @@
 
    text_len = png_strlen(text);
 
-   png_free(png_ptr, new_key);
-
    /* compute the compressed data; do it now for the length */
    text_len = png_text_compress(png_ptr, text, text_len, compression,
        &comp);
@@ -1407,7 +1407,9 @@
    png_write_chunk_start(png_ptr, (png_bytep)png_zTXt, (png_uint_32)
       (key_len+text_len+2));
    /* write key */
-   png_write_chunk_data(png_ptr, (png_bytep)key, key_len + 1);
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+   png_free(png_ptr, new_key);
+
    buf[0] = (png_byte)compression;
    /* write compression */
    png_write_chunk_data(png_ptr, (png_bytep)buf, (png_size_t)1);
@@ -1527,10 +1529,9 @@
    png_save_int_32(buf + 4, y_offset);
    buf[8] = (png_byte)unit_type;
 
-   png_write_chunk(png_ptr, (png_bytep)png_oFFs, buf, (png_size_t)9);
+   png_write_chunk(png_ptr, png_oFFs, buf, (png_size_t)9);
 }
 #endif
-
 #if defined(PNG_WRITE_pCAL_SUPPORTED)
 /* write the pCAL chunk (described in the PNG extensions document) */
 void /* PRIVATE */
@@ -1569,7 +1570,7 @@
    }
 
    png_debug1(3, "pCAL total length = %d\n", (int)total_len);
-   png_write_chunk_start(png_ptr, (png_bytep)png_pCAL, (png_uint_32)total_len);
+   png_write_chunk_start(png_ptr, png_pCAL, (png_uint_32)total_len);
    png_write_chunk_data(png_ptr, (png_bytep)new_purpose, purpose_len);
    png_save_int_32(buf, X0);
    png_save_int_32(buf + 4, X1);
@@ -1622,14 +1623,14 @@
       total_len += wc_len;
    }
 #else
-   sprintf(buf + 1, "%12.12e", width);
+   png_snprintf(buf + 1, 63, "%12.12e", width);
    total_len = 1 + png_strlen(buf + 1) + 1;
-   sprintf(buf + total_len, "%12.12e", height);
+   png_snprintf(buf + total_len, 64-total_len, "%12.12e", height);
    total_len += png_strlen(buf + total_len);
 #endif
 
    png_debug1(3, "sCAL total length = %u\n", (unsigned int)total_len);
-   png_write_chunk(png_ptr, (png_bytep)png_sCAL, (png_bytep)buf, total_len);
+   png_write_chunk(png_ptr, png_sCAL, (png_bytep)buf, total_len);
 }
 #else
 #ifdef PNG_FIXED_POINT_SUPPORTED
@@ -1659,7 +1660,7 @@
    png_memcpy(buf + wlen + 2, height, hlen);  /* do NOT append the '\0' here */
 
    png_debug1(3, "sCAL total length = %u\n", (unsigned int)total_len);
-   png_write_chunk(png_ptr, (png_bytep)png_sCAL, buf, total_len);
+   png_write_chunk(png_ptr, png_sCAL, buf, total_len);
 }
 #endif
 #endif
@@ -1685,7 +1686,7 @@
    png_save_uint_32(buf + 4, y_pixels_per_unit);
    buf[8] = (png_byte)unit_type;
 
-   png_write_chunk(png_ptr, (png_bytep)png_pHYs, buf, (png_size_t)9);
+   png_write_chunk(png_ptr, png_pHYs, buf, (png_size_t)9);
 }
 #endif
 
@@ -1717,7 +1718,7 @@
    buf[5] = mod_time->minute;
    buf[6] = mod_time->second;
 
-   png_write_chunk(png_ptr, (png_bytep)png_tIME, buf, (png_size_t)7);
+   png_write_chunk(png_ptr, png_tIME, buf, (png_size_t)7);
 }
 #endif
 
@@ -1751,6 +1752,7 @@
    png_ptr->row_buf = (png_bytep)png_malloc(png_ptr, (png_uint_32)buf_size);
    png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE;
 
+#ifndef PNG_NO_WRITE_FILTERING
    /* set up filtering buffer, if using this filter */
    if (png_ptr->do_filter & PNG_FILTER_SUB)
    {
@@ -1768,7 +1770,7 @@
 
       if (png_ptr->do_filter & PNG_FILTER_UP)
       {
-         png_ptr->up_row = (png_bytep )png_malloc(png_ptr,
+         png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
             (png_ptr->rowbytes + 1));
          png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
       }
@@ -1782,10 +1784,11 @@
 
       if (png_ptr->do_filter & PNG_FILTER_PAETH)
       {
-         png_ptr->paeth_row = (png_bytep )png_malloc(png_ptr,
+         png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
             (png_ptr->rowbytes + 1));
          png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
       }
+#endif /* PNG_NO_WRITE_FILTERING */
    }
 
 #ifdef PNG_WRITE_INTERLACING_SUPPORTED
@@ -2121,6 +2124,7 @@
 
    prev_row = png_ptr->prev_row;
    best_row = row_buf = png_ptr->row_buf;
+#ifndef PNG_NO_WRITE_FILTER
    mins = PNG_MAXSUM;
 
    /* The prediction method we use is to find which method provides the
@@ -2695,7 +2699,7 @@
          best_row = png_ptr->paeth_row;
       }
    }
-
+#endif /* PNG_NO_WRITE_FILTER */
    /* Do the actual writing of the filtered row data from the chosen filter. */
 
    png_write_filtered_row(png_ptr, best_row);
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index 7f10f11..7380b57 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -5,7 +5,7 @@
 
 set(PNGLIB_MAJOR 1)
 set(PNGLIB_MINOR 0)
-set(PNGLIB_RELEASE 26)
+set(PNGLIB_RELEASE 27)
 set(PNGLIB_NAME libpng${PNGLIB_MAJOR}${PNGLIB_MINOR})
 set(PNGLIB_VERSION ${PNGLIB_MAJOR}.${PNGLIB_MINOR}.${PNGLIB_RELEASE})
 
@@ -49,6 +49,12 @@
       set(png_asm_tmp "OFF")
    endif("uname_output" MATCHES "^.*i[1-9]86.*$")
  endif(uname_executable)
+else(NOT WIN32)
+ # this env var is normally only set on win64
+ SET(TEXT "ProgramFiles(x86)")
+ if("$ENV{${TEXT}}" STREQUAL "")
+  set(png_asm_tmp "ON")
+ endif("$ENV{${TEXT}}" STREQUAL "")
 endif(NOT WIN32)
 
 option(PNG_MMX "Use MMX assembler code (x86 only)" ${png_asm_tmp})
@@ -90,31 +96,21 @@
 # SOME NEEDED DEFINITIONS
 add_definitions(-DZLIB_DLL)
 
-if(MSVC)
- add_definitions(-DPNG_USE_PNGVCRD -DPNG_NO_MODULEDEF
-   -D_CRT_SECURE_NO_DEPRECATE)
- set(libpng_sources ${libpng_sources}
-         pngvcrd.c
- )
-else(MSVC)
- add_definitions(-DPNG_USE_PNGGCCRD)
- set(libpng_sources ${libpng_sources}
-         pnggccrd.c
- )
-endif(MSVC)
-
-if(NOT MSVC)
- if(NOT PNG_MMX)
-   add_definitions(-DLIBPNG_NO_MMX)
-   add_definitions(-DPNG_NO_MMX_CODE)
- endif(NOT PNG_MMX)
-else(NOT MSVC)
- if(PNG_MMX)
- # maybe add this to pngconf.h ?
- add_definitions(-DPNG_MMX_CODE_SUPPORTED)
- endif(PNG_MMX)
-endif(NOT MSVC)
-
+if(PNG_MMX)
+ if(MSVC)
+  add_definitions(-DPNG_NO_MODULEDEF -D_CRT_SECURE_NO_DEPRECATE)
+  set(libpng_sources ${libpng_sources}
+          pngvcrd.c
+  )
+ else(MSVC)
+  set(libpng_sources ${libpng_sources}
+          pnggccrd.c
+  )
+ endif(MSVC)
+else(PNG_MMX)
+  add_definitions(-DLIBPNG_NO_MMX)
+  add_definitions(-DPNG_NO_MMX_CODE)
+endif(PNG_MMX)
 
 if(PNG_CONSOLE_IO_SUPPORTED)
  add_definitions(-DPNG_CONSOLE_IO_SUPPORTED)
@@ -132,9 +128,9 @@
  add_definitions(-DPNG_DEBUG)
 endif(PNG_DEBUG)
 
-if(NOT M_LIBRARY)
+if(NOT M_LIBRARY AND NOT WIN32)
  add_definitions(-DPNG_NO_FLOATING_POINT_SUPPORTED)
-endif(NOT M_LIBRARY)
+endif(NOT M_LIBRARY AND NOT WIN32)
 
 # NOW BUILD OUR TARGET
 include_directories(${PNG_SOURCE_DIR} ${ZLIB_INCLUDE_DIR})
@@ -179,7 +175,7 @@
 
 # SET UP LINKS
 set_target_properties(${PNG_LIB_NAME} PROPERTIES
-#    VERSION 0.${PNGLIB_RELEASE}.1.0.26
+#    VERSION 0.${PNGLIB_RELEASE}.1.0.27rc1
      VERSION 0.${PNGLIB_RELEASE}.0
      SOVERSION 0
      CLEAN_DIRECT_OUTPUT 1)
diff --git a/scripts/libpng-config-head.in b/scripts/libpng-config-head.in
index 8631013..ea4d151 100755
--- a/scripts/libpng-config-head.in
+++ b/scripts/libpng-config-head.in
@@ -8,7 +8,7 @@
 
 # Modeled after libxml-config.
 
-version=1.0.26
+version=1.0.27rc1
 prefix=""
 libdir=""
 libs=""
diff --git a/scripts/libpng.pc-configure.in b/scripts/libpng.pc-configure.in
index 332ff84..106248f 100644
--- a/scripts/libpng.pc-configure.in
+++ b/scripts/libpng.pc-configure.in
@@ -5,6 +5,6 @@
 
 Name: libpng
 Description: Loads and saves PNG files
-Version: 1.0.26
+Version: 1.0.27rc1
 Libs: -L${libdir} -lpng10
 Cflags: -I${includedir} @LIBPNG_NO_MMX@
diff --git a/scripts/libpng.pc.in b/scripts/libpng.pc.in
index b007beb..3f29484 100644
--- a/scripts/libpng.pc.in
+++ b/scripts/libpng.pc.in
@@ -5,6 +5,6 @@
 
 Name: libpng
 Description: Loads and saves PNG files
-Version: 1.0.26
+Version: 1.0.27rc1
 Libs: -L${libdir} -lpng10
 Cflags: -I${includedir}
diff --git a/scripts/makefile.32sunu b/scripts/makefile.32sunu
index 8f15463..001aa38 100644
--- a/scripts/makefile.32sunu
+++ b/scripts/makefile.32sunu
@@ -8,7 +8,7 @@
 # Library name:
 LIBNAME=libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
diff --git a/scripts/makefile.64sunu b/scripts/makefile.64sunu
index a2b04ed..495287e 100644
--- a/scripts/makefile.64sunu
+++ b/scripts/makefile.64sunu
@@ -8,7 +8,7 @@
 # Library name:
 LIBNAME=libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
diff --git a/scripts/makefile.aix b/scripts/makefile.aix
index e4eba37..0e6a44d 100644
--- a/scripts/makefile.aix
+++ b/scripts/makefile.aix
@@ -20,7 +20,7 @@
 
 LIBNAME=libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 prefix=/usr/local
diff --git a/scripts/makefile.beos b/scripts/makefile.beos
index 405f9ab..876bf71 100644
--- a/scripts/makefile.beos
+++ b/scripts/makefile.beos
@@ -8,7 +8,7 @@
 # Library name:
 LIBNAME=libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
diff --git a/scripts/makefile.cygwin b/scripts/makefile.cygwin
index 10997b0..63aed9c 100644
--- a/scripts/makefile.cygwin
+++ b/scripts/makefile.cygwin
@@ -3,7 +3,7 @@
 #   of the library, and builds two copies of pngtest: one
 #   statically linked and one dynamically linked.
 #
-# Copyright (C) 2002, 2006 Soren Anderson, Charles Wilson,
+# Copyright (C) 2002, 2006, 2007 Soren Anderson, Charles Wilson,
 #    and Glenn Randers-Pehrson, based on makefile for linux-elf w/mmx by:
 # Copyright (C) 1998-2000 Greg Roelofs
 # Copyright (C) 1996, 1997 Andreas Dilger
@@ -31,9 +31,8 @@
 
 DESTDIR=
 
-# To enable assembler optimizations, add '-DPNG_USE_PNGGCCRD' to
-# $CFLAGS, and include pnggccrd.o in $OBJS, below, and in the dependency
-# list at the bottom of this makefile.
+# To disable assembler optimizations, add '-DPNG_NO_MMX_CODE' to
+# $CFLAGS.  To enable, add pnggccrd.o to the dependencies.
 
 CC=gcc
 ifdef MINGW
@@ -62,25 +61,23 @@
 	-Wmissing-declarations -Wtraditional -Wcast-align \
 	-Wstrict-prototypes -Wmissing-prototypes #-Wconversion
 
-### if you use the asm, add pnggccrd.o to the OBJS list
-###
 ### if you don't need thread safety, but want the asm accel
-#CFLAGS= $(strip $(MINGW_CCFLAGS) -DPNG_THREAD_UNSAFE_OK -DPNG_USE_PNGGCCRD \
-#	$(addprefix -I,$(ZLIBINC)) -Wall -O3 $(ALIGN) -funroll-loops \
+#CFLAGS= $(strip $(MINGW_CCFLAGS) -DPNG_THREAD_UNSAFE_OK \
+#	$(addprefix -I,$(ZLIBINC)) -Wall -O $(ALIGN) -funroll-loops \
 #	-fomit-frame-pointer)  # $(WARNMORE) -g -DPNG_DEBUG=5
 ### if you need thread safety and want (minimal) asm accel
-#CFLAGS= $(strip $(MINGW_CCFLAGS) -DPNG_USE_PNGGCCRD $(addprefix -I,$(ZLIBINC)) \
-#	-Wall -O3 $(ALIGN) -funroll-loops \
+#CFLAGS= $(strip $(MINGW_CCFLAGS) $(addprefix -I,$(ZLIBINC)) \
+#	-Wall -O $(ALIGN) -funroll-loops \
 #	-fomit-frame-pointer)  # $(WARNMORE) -g -DPNG_DEBUG=5
 ### Normal (non-asm) compilation
 CFLAGS= $(strip $(MINGW_CCFLAGS) $(addprefix -I,$(ZLIBINC)) \
-        -Wall -O3 $(ALIGN) -funroll-loops \
+        -Wall -O3 $(ALIGN) -funroll-loops -DPNG_NO_MMX_CODE \
 	-fomit-frame-pointer) # $(WARNMORE) -g -DPNG_DEBUG=5
 
 LIBNAME = libpng10
 PNGMAJ = 0
 CYGDLL = 10
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 SHAREDLIB=cygpng$(CYGDLL).dll
@@ -159,19 +156,19 @@
 all-static: $(STATLIB) pngtest-stat$(EXE)
 all-shared: $(SHAREDLIB) pngtest$(EXE)
 
-pnggccrd.o: pnggccrd.c png.h pngconf.h
-	@echo ""
-	@echo '    You can ignore the "control reaches end of non-void function"'
-	@echo '    warning and "<variable> defined but not used" warnings:'
-	@echo ""
-	$(CC) -c $(CFLAGS) -o $@ $<
+# pnggccrd.o: pnggccrd.c png.h pngconf.h
+#	@echo ""
+#	@echo '    You can ignore the "control reaches end of non-void function"'
+#	@echo '    warning and "<variable> defined but not used" warnings:'
+#	@echo ""
+#	$(CC) -c $(CFLAGS) -o $@ $<
 
-pnggccrd.pic.o:	pnggccrd.c png.h pngconf.h
-	@echo ""
-	@echo '    You can ignore the "control reaches end of non-void function"'
-	@echo '    warning and "<variable> defined but not used" warnings:'
-	@echo ""
-	$(CC) -c $(CFLAGS) -DPNG_BUILD_DLL -o $@ $<
+# pnggccrd.pic.o:	pnggccrd.c png.h pngconf.h
+#	@echo ""
+#	@echo '    You can ignore the "control reaches end of non-void function"'
+#	@echo '    warning and "<variable> defined but not used" warnings:'
+#	@echo ""
+#	$(CC) -c $(CFLAGS) -DPNG_BUILD_DLL -o $@ $<
 
 $(STATLIB): $(OBJS)
 	ar rc $@ $(OBJS)
@@ -308,6 +305,7 @@
 pngwtran.o pngwtran.pic.o:	png.h pngconf.h pngwtran.c
 pngwutil.o pngwutil.pic.o:	png.h pngconf.h pngwutil.c
 pngpread.o pngpread.pic.o:	png.h pngconf.h pngpread.c
+# pnggccrd.o pnggccrd.pic.o:	png.h pngconf.h pnggccrd.c
 
 pngtest.o:			png.h pngconf.h pngtest.c
 pngtest-stat.o:			png.h pngconf.h pngtest.c
diff --git a/scripts/makefile.darwin b/scripts/makefile.darwin
index bf08a40..fa57646 100644
--- a/scripts/makefile.darwin
+++ b/scripts/makefile.darwin
@@ -19,7 +19,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -38,7 +38,8 @@
 RANLIB=ranlib
 RM_F=/bin/rm -f
 
-CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops
+# CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops -DPNG_NO_MMX_CODE
+CFLAGS=-I$(ZLIBINC) -Wall -O -funroll-loops
 LDFLAGS=-L. -L$(ZLIBLIB) -lpng10 -lz
 
 INCPATH=$(prefix)/include
diff --git a/scripts/makefile.dec b/scripts/makefile.dec
index ec3ded4..6046ba7 100644
--- a/scripts/makefile.dec
+++ b/scripts/makefile.dec
@@ -5,7 +5,7 @@
 
 # Library name:
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 LIBNAME = libpng10
 
diff --git a/scripts/makefile.elf b/scripts/makefile.elf
index 2d88173..03eb213 100644
--- a/scripts/makefile.elf
+++ b/scripts/makefile.elf
@@ -12,7 +12,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
diff --git a/scripts/makefile.freebsd b/scripts/makefile.freebsd
index 7821e38..59f3644 100644
--- a/scripts/makefile.freebsd
+++ b/scripts/makefile.freebsd
@@ -24,14 +24,14 @@
 LDADD+=         -lm -lz
 DPADD+=         ${LIBM} ${LIBZ}
 
-CFLAGS+= -I. -DPNG_USE_PNGGCCRD
+CFLAGS+= -I.
 .if (${MACHINE_ARCH} != "i386")
 CFLAGS+= -DPNG_NO_MMX_CODE
 .endif
 
 SRCS=	png.c pngset.c pngget.c pngrutil.c pngtrans.c pngwutil.c \
 	pngread.c pngrio.c pngwio.c pngwrite.c pngrtran.c \
-	pngwtran.c pngmem.c pngerror.c pngpread.c pnggccrd.c
+	pngwtran.c pngmem.c pngerror.c pngpread.c
 
 pngtest: pngtest.o libpng.a
 	${CC} ${CFLAGS} -L. -static -o pngtest pngtest.o -lpng -lz -lm
diff --git a/scripts/makefile.gcmmx b/scripts/makefile.gcmmx
index ff3ac42..dec71c0 100644
--- a/scripts/makefile.gcmmx
+++ b/scripts/makefile.gcmmx
@@ -16,7 +16,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -60,15 +60,15 @@
 
 # Remove -DPNG_THREAD_UNSAFE_OK if you need thread safety
 ### for generic gcc:
-CFLAGS=-DPNG_THREAD_UNSAFE_OK -DPNG_USE_PNGGCCRD -I$(ZLIBINC) -Wall \
-	-O3 $(ALIGN) -funroll-loops \
+CFLAGS=-DPNG_THREAD_UNSAFE_OK -I$(ZLIBINC) -Wall -O \
+	$(ALIGN) -funroll-loops \
 	-fomit-frame-pointer  # $(WARNMORE) -g -DPNG_DEBUG=5
 ### for gcc 2.95.2 on 686:
-#CFLAGS=-DPNG_THREAD_UNSAFE_OK -DPNG_USE_PNGGCCRD -I$(ZLIBINC) -Wall -O3 \
+#CFLAGS=-DPNG_THREAD_UNSAFE_OK -I$(ZLIBINC) -Wall -O \
 #	-mcpu=i686 -malign-double -ffast-math -fstrict-aliasing \
 #	$(ALIGN) -funroll-loops -funroll-all-loops -fomit-frame-pointer 
 ### for gcc 2.7.2.3 on 486 and up:
-#CFLAGS=-DPNG_THREAD_UNSAFE_OK -DPNG_USE_PNGGCCRD -I$(ZLIBINC) -Wall -O3 \
+#CFLAGS=-DPNG_THREAD_UNSAFE_OK -I$(ZLIBINC) -Wall -O \
 #	-m486 -malign-double -ffast-math \
 #	$(ALIGN) -funroll-loops -funroll-all-loops -fomit-frame-pointer 
 
@@ -120,7 +120,7 @@
 	( cat scripts/libpng-config-head.in; \
 	echo prefix=\"$(prefix)\"; \
 	echo I_opts=\"-I$(INCPATH)/$(LIBNAME)\"; \
-	echo cppflags=\"-DPNG_THREAD_UNSAFE_OK -DPNG_USE_PNGGCCRD\"; \
+	echo cppflags=\"-DPNG_THREAD_UNSAFE_OK \"; \
 	echo L_opts=\"-L$(LIBPATH)\"; \
 	echo R_opts=\"-Wl,-rpath,$(LIBPATH)\"; \
 	echo libs=\"-lpng10 -lz -lm\"; \
diff --git a/scripts/makefile.hp64 b/scripts/makefile.hp64
index f4d7cd0..22cc74d 100644
--- a/scripts/makefile.hp64
+++ b/scripts/makefile.hp64
@@ -18,7 +18,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
diff --git a/scripts/makefile.hpgcc b/scripts/makefile.hpgcc
index e357cbf..19b2571 100644
--- a/scripts/makefile.hpgcc
+++ b/scripts/makefile.hpgcc
@@ -1,5 +1,5 @@
 # makefile for libpng on HP-UX using GCC with the HP ANSI/C linker.
-# Copyright (C) 2002, 2006 Glenn Randers-Pehrson
+# Copyright (C) 2002, 2006, 2007 Glenn Randers-Pehrson
 # Copyright (C) 2001, Laurent faillie
 # Copyright (C) 1998, 1999 Greg Roelofs
 # Copyright (C) 1996, 1997 Andreas Dilger
@@ -8,7 +8,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -53,7 +53,7 @@
 
 # for pgcc version 2.95.1, -O3 is buggy; don't use it.
 
-CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops \
+CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops -DPNG_NO_MMX_CODE \
 	$(ALIGN) # $(WARNMORE) -g -DPNG_DEBUG=5
 #LDFLAGS=-L. -Wl,-rpath,. -L$(ZLIBLIB) -Wl,-rpath,$(ZLIBLIB) -lpng10 -lz -lm
 LDFLAGS=-L. -L$(ZLIBLIB) -lpng10 -lz -lm
diff --git a/scripts/makefile.hpux b/scripts/makefile.hpux
index f6fee7e..e1a7c9b 100644
--- a/scripts/makefile.hpux
+++ b/scripts/makefile.hpux
@@ -18,7 +18,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -45,7 +45,7 @@
 MANPATH=$(prefix)/man
 BINPATH=$(exec_prefix)/bin
 
-CFLAGS=-I$(ZLIBINC) -O -Ae +DA1.1 +DS2.0
+CFLAGS=-I$(ZLIBINC) -O -Ae +DA1.1 +DS2.0 -DPNG_NO_MMX_CODE
 # Caution: be sure you have built zlib with the same CFLAGS.
 CCFLAGS=-I$(ZLIBINC) -O -Ae +DA1.1 +DS2.0
 LDFLAGS=-L. -L$(ZLIBLIB) -lpng -lz -lm
diff --git a/scripts/makefile.intel b/scripts/makefile.intel
index 1cabe77..a155ddd 100644
--- a/scripts/makefile.intel
+++ b/scripts/makefile.intel
@@ -8,14 +8,6 @@
 # To use, do "nmake /f scripts\makefile.intel"
 
 
-# ------------------- Intel C/C++ Compiler 4.0 and later -------------------
-
-# Caution: the assembler code was introduced at libpng version 1.0.4 and has
-# not yet been thoroughly tested.
-
-# Use assembler code
-ASMCODE=-DPNG_USE_PNGVCRD
-
 # Where the zlib library and include files are located
 ZLIBLIB=..\zlib
 ZLIBINC=..\zlib
@@ -36,7 +28,7 @@
 
 
 CC=icl -c
-CFLAGS=-O2 -G$(CPU)$(CALLING) -Qip -Qunroll4 -I$(ZLIBINC) $(ASMCODE) -nologo
+CFLAGS=-O2 -G$(CPU)$(CALLING) -Qip -Qunroll4 -I$(ZLIBINC) -nologo
 LD=link
 LDFLAGS=/SUBSYSTEM:CONSOLE /NOLOGO
 
@@ -44,8 +36,7 @@
 
 OBJS=png$(O) pngset$(O) pngget$(O) pngrutil$(O) pngtrans$(O) pngwutil$(O) \
 pngmem$(O) pngpread$(O) pngread$(O) pngerror$(O) pngwrite$(O) \
-pngrtran$(O) pngwtran$(O) pngrio$(O) pngwio$(O) pngvcrd$(O)
-
+pngrtran$(O) pngwtran$(O) pngrio$(O) pngwio$(O) pngvcrd$(O) pnggccrd$(O)
 
 all: test
 
@@ -70,6 +61,9 @@
 pngrutil$(O): png.h pngconf.h
 	$(CC) $(CFLAGS) $*.c $(ERRFILE)
 
+pnggccrd$(O): png.h pngconf.h
+	$(CC) $(CFLAGS) $*.c $(ERRFILE)
+
 pngvcrd$(O): png.h pngconf.h
 	$(CC) $(CFLAGS) $*.c $(ERRFILE)
 
diff --git a/scripts/makefile.linux b/scripts/makefile.linux
index b100eb4..6d7f5da 100644
--- a/scripts/makefile.linux
+++ b/scripts/makefile.linux
@@ -6,7 +6,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -47,7 +47,7 @@
 
 # for pgcc version 2.95.1, -O3 is buggy; don't use it.
 
-CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops \
+CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops -DPNG_NO_MMX_CODE \
 	$(ALIGN) # $(WARNMORE) -g -DPNG_DEBUG=5
 
 LDFLAGS=-L. -Wl,-rpath,. -L$(ZLIBLIB) -Wl,-rpath,$(ZLIBLIB) -lpng10 -lz -lm
diff --git a/scripts/makefile.mingw b/scripts/makefile.mingw
index 2249633..20d1941 100644
--- a/scripts/makefile.mingw
+++ b/scripts/makefile.mingw
@@ -6,15 +6,15 @@
 # Built from makefile.cygwin
 # Copyright (C) 2002, 2006 Soren Anderson, Charles Wilson,
 #    and Glenn Randers-Pehrson, based on makefile for linux-elf w/mmx by:
-# Copyright (C) 1998-2000 Greg Roelofs
+# Copyright (C) 1998-2000, 2007 Greg Roelofs
 # Copyright (C) 1996, 1997 Andreas Dilger
 # For conditions of distribution and use, see copyright notice in png.h
 
+
 # This makefile intends to support building outside the src directory
 # if desired. When invoking it, specify an argument to SRCDIR on the
 # command line that points to the top of the directory where your source
 # is located.
-
 ifdef SRCDIR
 VPATH = $(SRCDIR)
 else
@@ -29,20 +29,24 @@
 # If you're going to install into a temporary location
 # via DESTDIR, $(DESTDIR)$(prefix) must already exist before
 # you execute make install.
-
 DESTDIR=
 
-# To enable assembler optimizations, add '-DPNG_USE_PNGGCCRD' to
-# $CFLAGS, and include pnggccrd.o in $OBJS, below, and in the dependency
-# list at the bottom of this makefile.
-
+# If you're using a cross-compiler, add the appropriate prefix (e.g.,
+# "i386-mingw32msvc-") to the following three commands:
 CC=gcc
+AR=ar
+RANLIB=ranlib
 
+MKDIR_P=/bin/mkdir -pv
+
+
+# To disable assembler optimizations, add '-DPNG_NO_MMX_CODE' to
+# $CFLAGS.  To enable them, add pnggccrd.o to the dependencies.
 
 # Where "make install" puts libpng*.a, *png*.dll, png.h, and pngconf.h
 ifndef prefix
 prefix=/usr
-$(warning You haven't specified a 'prefix=' location. Defaulting to "/usr")
+$(warning "You haven't specified a 'prefix=' location. Defaulting to '/usr'")
 endif
 exec_prefix=$(prefix)
 
@@ -58,25 +62,23 @@
 	-Wmissing-declarations -Wtraditional -Wcast-align \
 	-Wstrict-prototypes -Wmissing-prototypes #-Wconversion
 
-### if you use the asm, add pnggccrd.o to the OBJS list
-###
 ### if you don't need thread safety, but want the asm accel
-#CFLAGS= $(strip $(MINGW_CCFLAGS) -DPNG_THREAD_UNSAFE_OK -DPNG_USE_PNGGCCRD \
-#	$(addprefix -I,$(ZLIBINC)) -Wall -O3 $(ALIGN) -funroll-loops \
+#CFLAGS= $(strip $(MINGW_CCFLAGS) -DPNG_THREAD_UNSAFE_OK \
+#	$(addprefix -I,$(ZLIBINC)) -Wall -O $(ALIGN) -funroll-loops \
 #	-fomit-frame-pointer)  # $(WARNMORE) -g -DPNG_DEBUG=5
 ### if you need thread safety and want (minimal) asm accel
-#CFLAGS= $(strip $(MINGW_CCFLAGS) -DPNG_USE_PNGGCCRD $(addprefix -I,$(ZLIBINC)) \
-#	-Wall -O3 $(ALIGN) -funroll-loops \
+#CFLAGS= $(strip $(MINGW_CCFLAGS) $(addprefix -I,$(ZLIBINC)) \
+#	-Wall -O $(ALIGN) -funroll-loops \
 #	-fomit-frame-pointer)  # $(WARNMORE) -g -DPNG_DEBUG=5
 ### Normal (non-asm) compilation
 CFLAGS= $(strip $(MINGW_CCFLAGS) $(addprefix -I,$(ZLIBINC)) \
-        -Wall -O3 $(ALIGN) -funroll-loops \
+        -Wall -O3 $(ALIGN) -funroll-loops -DPNG_NO_MMX_CODE \
 	-fomit-frame-pointer) # $(WARNMORE) -g -DPNG_DEBUG=5
 
 LIBNAME = libpng10
 PNGMAJ = 0
 MINGDLL = 10
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 SHAREDLIB=libpng$(MINGDLL).dll
@@ -90,10 +92,6 @@
 LDSFLAGS=$(strip -shared -L.  $(MINGW_LDFLAGS))
 LDEXTRA=-Wl,--out-implib=$(IMPLIB) $(addprefix -L,$(ZLIBLIB)) -lz
 
-MKDIR_P=/bin/mkdir -pv
-RANLIB=ranlib
-#RANLIB=echo
-
 INCPATH=$(prefix)/include
 LIBPATH=$(exec_prefix)/lib
 
@@ -155,22 +153,8 @@
 all-static: $(STATLIB) pngtest-stat$(EXE)
 all-shared: $(SHAREDLIB) pngtest$(EXE)
 
-pnggccrd.o: pnggccrd.c png.h pngconf.h
-	@echo ""
-	@echo '    You can ignore the "control reaches end of non-void function"'
-	@echo '    warning and "<variable> defined but not used" warnings:'
-	@echo ""
-	$(CC) -c $(CFLAGS) -o $@ $<
-
-pnggccrd.pic.o:	pnggccrd.c png.h pngconf.h
-	@echo ""
-	@echo '    You can ignore the "control reaches end of non-void function"'
-	@echo '    warning and "<variable> defined but not used" warnings:'
-	@echo ""
-	$(CC) -c $(CFLAGS) -DPNG_BUILD_DLL -o $@ $<
-
 $(STATLIB): $(OBJS)
-	ar rc $@ $(OBJS)
+	$(AR) rc $@ $(OBJS)
 	$(RANLIB) $@
 
 $(SHAREDDEF): scripts/pngw32.def
@@ -186,12 +170,6 @@
 pngtest-stat$(EXE): pngtest.o $(STATLIB)
 	$(CC) -static $(CFLAGS) $< $(LDFLAGS) -o $@
 
-pngtest.pic.o: pngtest.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-pngtest.o: pngtest.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
 test: test-static test-shared
 
 test-static: pngtest-stat$(EXE)
@@ -278,8 +256,8 @@
 
 clean:
 	/bin/rm -f *.pic.o *.o $(STATLIB) $(IMPLIB) $(SHAREDLIB) \
-	pngtest-stat$(EXE) pngtest$(EXE) pngout.png $(SHAREDDEF) \
-	libpng-config libpng.pc pngtesti$(EXE)
+	   pngtest-stat$(EXE) pngtest$(EXE) pngout.png $(SHAREDDEF) \
+	   libpng-config libpng.pc pngtesti$(EXE)
 
 DOCS = ANNOUNCE CHANGES INSTALL KNOWNBUG LICENSE README TODO Y2KINFO
 writelock:
@@ -304,9 +282,9 @@
 pngwtran.o pngwtran.pic.o:	png.h pngconf.h pngwtran.c
 pngwutil.o pngwutil.pic.o:	png.h pngconf.h pngwutil.c
 pngpread.o pngpread.pic.o:	png.h pngconf.h pngpread.c
+# pnggccrd.o pnggccrd.pic.o:	png.h pngconf.h pnggccrd.c
 
-pngtest.o:			png.h pngconf.h pngtest.c
-pngtest-stat.o:			png.h pngconf.h pngtest.c
+pngtest.o pngtest.pic.o:	png.h pngconf.h pngtest.c
 
 
 
diff --git a/scripts/makefile.ne12bsd b/scripts/makefile.ne12bsd
index f1ec718..b9f8753 100644
--- a/scripts/makefile.ne12bsd
+++ b/scripts/makefile.ne12bsd
@@ -14,14 +14,14 @@
 
 LIB=	png12
 SHLIB_MAJOR=	0
-SHLIB_MINOR=	1.0.26
+SHLIB_MINOR=	1.0.27rc1
 SRCS=	pnggccrd.c png.c pngset.c pngget.c pngrutil.c pngtrans.c pngwutil.c \
 		pngread.c pngrio.c pngwio.c pngwrite.c pngrtran.c \
 		pngwtran.c pngmem.c pngerror.c pngpread.c
 INCS=	png.h pngconf.h
 MAN=	libpng.3 libpngpf.3 png.5
 
-CPPFLAGS+=-I${.CURDIR} -DPNG_USE_PNGGCCRD
+CPPFLAGS+=-I${.CURDIR}
 
 # something like this for mmx assembler, but it core dumps for me at the moment
 # .if ${MACHINE_ARCH} == "i386"
diff --git a/scripts/makefile.netbsd b/scripts/makefile.netbsd
index 1c00300..062726a 100644
--- a/scripts/makefile.netbsd
+++ b/scripts/makefile.netbsd
@@ -14,14 +14,14 @@
 
 LIB=	png
 SHLIB_MAJOR=	3
-SHLIB_MINOR=	1.0.26
+SHLIB_MINOR=	1.0.27rc1
 SRCS=	pnggccrd.c png.c pngset.c pngget.c pngrutil.c pngtrans.c pngwutil.c \
 		pngread.c pngrio.c pngwio.c pngwrite.c pngrtran.c \
 		pngwtran.c pngmem.c pngerror.c pngpread.c
 INCS=	png.h pngconf.h
 MAN=	libpng.3 libpngpf.3 png.5
 
-CPPFLAGS+=-I${.CURDIR} -DPNG_USE_PNGGCCRD
+CPPFLAGS+=-I${.CURDIR}
 
 # something like this for mmx assembler, but it core dumps for me at the moment
 # .if ${MACHINE_ARCH} == "i386"
diff --git a/scripts/makefile.nommx b/scripts/makefile.nommx
index ac61052..2470b69 100644
--- a/scripts/makefile.nommx
+++ b/scripts/makefile.nommx
@@ -7,7 +7,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -48,8 +48,7 @@
 
 # for pgcc version 2.95.1, -O3 is buggy; don't use it.
 
-CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops \
-	-DPNG_NO_MMX_CODE \
+CFLAGS=-I$(ZLIBINC) -Wall -O3 -funroll-loops -DPNG_NO_MMX_CODE \
 	$(ALIGN) # $(WARNMORE) -g -DPNG_DEBUG=5
 
 LDFLAGS=-L. -Wl,-rpath,. -L$(ZLIBLIB) -Wl,-rpath,$(ZLIBLIB) -lpng10 -lz -lm
diff --git a/scripts/makefile.openbsd b/scripts/makefile.openbsd
index a79e056..d5db674 100644
--- a/scripts/makefile.openbsd
+++ b/scripts/makefile.openbsd
@@ -8,7 +8,7 @@
 MANDIR= ${PREFIX}/man/cat
 
 SHLIB_MAJOR=	0
-SHLIB_MINOR=	1.0.26
+SHLIB_MINOR=	1.0.27rc1
 
 LIB=	png
 SRCS=	png.c pngerror.c pnggccrd.c pngget.c pngmem.c pngpread.c \
@@ -18,7 +18,7 @@
 HDRS=	png.h pngconf.h
 
 CFLAGS+= -Wall
-CPPFLAGS+= -I${.CURDIR} -DPNG_NO_MMX_CODE -DPNG_USE_PNGGCCRD
+CPPFLAGS+= -I${.CURDIR} -DPNG_NO_MMX_CODE 
 
 NOPROFILE= Yes
 
diff --git a/scripts/makefile.sco b/scripts/makefile.sco
index 4de5d0e..0586320 100644
--- a/scripts/makefile.sco
+++ b/scripts/makefile.sco
@@ -9,7 +9,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -38,7 +38,7 @@
 ZLIBLIB=../zlib
 ZLIBINC=../zlib
 
-CFLAGS= -dy -belf -I$(ZLIBINC) -O3
+CFLAGS= -dy -belf -I$(ZLIBINC) -O3 -DPNG_NO_MMX_CODE
 LDFLAGS=-L. -L$(ZLIBLIB) -lpng10 -lz -lm
 
 INCPATH=$(prefix)/include/libpng
diff --git a/scripts/makefile.sggcc b/scripts/makefile.sggcc
index 692590d..4133a5e 100644
--- a/scripts/makefile.sggcc
+++ b/scripts/makefile.sggcc
@@ -6,7 +6,7 @@
 # Library name:
 LIBNAME=libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -44,7 +44,7 @@
 ABI=
 
 WARNMORE= # -g -DPNG_DEBUG=5
-CFLAGS=$(ABI) -I$(ZLIBINC) -O2 $(WARNMORE) -fPIC -mabi=n32
+CFLAGS=$(ABI) -I$(ZLIBINC) -O $(WARNMORE) -fPIC -mabi=n32 -DPNG_NO_MMX_CODE
 LDFLAGS=$(ABI) -L. -L$(ZLIBLIB) -lpng -lz -lm
 LDSHARED=cc $(ABI) -shared -soname $(LIBSOMAJ) \
   -set_version sgi$(PNGMAJ).0
@@ -73,7 +73,7 @@
 DL=$(DESTDIR)$(LIBPATH)
 DM=$(DESTDIR)$(MANPATH)
 
-OBJS = pnggccrd.o png.o pngset.o pngget.o pngrutil.o pngtrans.o pngwutil.o \
+OBJS =  png.o pngset.o pngget.o pngrutil.o pngtrans.o pngwutil.o \
 	pngread.o pngrio.o pngwio.o pngwrite.o pngrtran.o \
 	pngwtran.o pngmem.o pngerror.o pngpread.o
 
@@ -93,6 +93,7 @@
 	echo prefix=\"$(prefix)\"; \
 	echo I_opts=\"-I$(INCPATH)/$(LIBNAME)\"; \
 	echo ccopts=\"$(ABI)\"; \
+	echo cppflags=\"-DPNG_NO_MMX_CODE\"; \
 	echo ldopts=\"$(ABI)\"; \
 	echo L_opts=\"-L$(LIBPATH)\"; \
 	echo libdir=\"$(LIBPATH)\"; \
@@ -234,5 +235,4 @@
 pngwtran.o: png.h pngconf.h
 pngwutil.o: png.h pngconf.h
 pngpread.o: png.h pngconf.h
-pnggccrd.o: png.h pngconf.h
 
diff --git a/scripts/makefile.sgi b/scripts/makefile.sgi
index ec19f0d..b8d5e25 100644
--- a/scripts/makefile.sgi
+++ b/scripts/makefile.sgi
@@ -6,7 +6,7 @@
 # Library name:
 LIBNAME=libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -45,9 +45,8 @@
 
 WARNMORE=-fullwarn
 # Note: -KPIC is the default anyhow
-#CFLAGS= $(ABI) -I$(ZLIBINC) -O $(WARNMORE) -KPIC -DPNG_USE_PNGGCCRD # -g -DPNG_DEBUG=5
-CFLAGS=$(ABI) -I$(ZLIBINC) -O $(WARNMORE) -DPNG_USE_PNGGCCRD \
-  -DPNG_NO_MMX_CODE
+#CFLAGS= $(ABI) -I$(ZLIBINC) -O $(WARNMORE) -KPIC -DPNG_NO_MMX_CODE # -g -DPNG_DEBUG=5
+CFLAGS=$(ABI) -I$(ZLIBINC) -O $(WARNMORE) -DPNG_NO_MMX_CODE
 LDFLAGS_A=$(ABI) -L. -L$(ZLIBLIB) -lpng10 -lz -lm
 LDFLAGS=$(ABI) -L. -L$(ZLIBLIB) -lpng -lz -lm
 LDSHARED=cc $(ABI) -shared -soname $(LIBSOMAJ) \
@@ -77,7 +76,7 @@
 DL=$(DESTDIR)$(LIBPATH)
 DM=$(DESTDIR)$(MANPATH)
 
-OBJS = pnggccrd.o png.o pngset.o pngget.o pngrutil.o pngtrans.o pngwutil.o \
+OBJS =  png.o pngset.o pngget.o pngrutil.o pngtrans.o pngwutil.o \
 	pngread.o pngrio.o pngwio.o pngwrite.o pngrtran.o \
 	pngwtran.o pngmem.o pngerror.o pngpread.o
 
@@ -96,7 +95,7 @@
 	( cat scripts/libpng-config-head.in; \
 	echo prefix=\"$(prefix)\"; \
 	echo I_opts=\"-I$(INCPATH)/$(LIBNAME)\"; \
-	echo cppflags=\"-DPNG_USE_PNGGCCRD -DPNG_NO_MMX_CODE\"; \
+	echo cppflags=\"-DPNG_NO_MMX_CODE\"; \
 	echo ccopts=\"$(ABI)\"; \
 	echo ldopts=\"$(ABI)\"; \
 	echo L_opts=\"-L$(LIBPATH)\"; \
@@ -239,5 +238,4 @@
 pngwtran.o: png.h pngconf.h
 pngwutil.o: png.h pngconf.h
 pngpread.o: png.h pngconf.h
-pnggccrd.o: png.h pngconf.h
 
diff --git a/scripts/makefile.so9 b/scripts/makefile.so9
index 284a5e2..1b7cb9c 100644
--- a/scripts/makefile.so9
+++ b/scripts/makefile.so9
@@ -8,7 +8,7 @@
 
 # Library name:
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 LIBNAME = libpng10
 
@@ -47,8 +47,8 @@
 #WARNMORE=-Wwrite-strings -Wpointer-arith -Wshadow \
 	-Wmissing-declarations -Wtraditional -Wcast-align \
 	-Wstrict-prototypes -Wmissing-prototypes #-Wconversion
-#CFLAGS=-I$(ZLIBINC) -Wall -O3 $(WARNMORE) -g -DPNG_DEBUG=5
-CFLAGS=-I$(ZLIBINC) -O3
+#CFLAGS=-I$(ZLIBINC) -Wall -O3 $(WARNMORE) -g -DPNG_DEBUG=5 -DPNG_NO_MMX_CODE
+CFLAGS=-I$(ZLIBINC) -O3 -DPNG_NO_MMX_CODE
 LDFLAGS=-L. -R. -L$(ZLIBLIB) -R$(ZLIBLIB) -lpng10 -lz -lm
 
 INCPATH=$(prefix)/include
diff --git a/scripts/makefile.solaris b/scripts/makefile.solaris
index dbba2d1..d151c8b 100644
--- a/scripts/makefile.solaris
+++ b/scripts/makefile.solaris
@@ -8,7 +8,7 @@
 # Library name:
 LIBNAME = libpng10
 PNGMAJ = 0
-PNGMIN = 1.0.26
+PNGMIN = 1.0.27rc1
 PNGVER = $(PNGMAJ).$(PNGMIN)
 
 # Shared library names:
@@ -44,6 +44,7 @@
 	-Wmissing-declarations -Wtraditional -Wcast-align \
 	-Wstrict-prototypes -Wmissing-prototypes #-Wconversion
 CFLAGS=-I$(ZLIBINC) -Wall -O \
+	-DPNG_NO_MMX_CODE; \
 	# $(WARNMORE) -g -DPNG_DEBUG=5
 LDFLAGS=-L. -R. -L$(ZLIBLIB) -R$(ZLIBLIB) -lpng10 -lz -lm
 
@@ -91,7 +92,7 @@
 	( cat scripts/libpng-config-head.in; \
 	echo prefix=\"$(prefix)\"; \
 	echo I_opts=\"-I$(INCPATH)/$(LIBNAME)\"; \
-	echo cppflags=\"-DPNG_USE_PNGGCCRD -DPNG_NO_MMX_CODE\"; \
+	echo cppflags=\"-DPNG_NO_MMX_CODE\"; \
 	echo L_opts=\"-L$(LIBPATH)\"; \
 	echo R_opts=\"-R$(LIBPATH)\"; \
 	echo libs=\"-lpng10 -lz -lm\"; \
diff --git a/scripts/makefile.solaris-x86 b/scripts/makefile.solaris-x86
new file mode 100644
index 0000000..4771ab2
--- /dev/null
+++ b/scripts/makefile.solaris-x86
@@ -0,0 +1,245 @@
+# makefile for libpng on Solaris 2.x with gcc
+# Copyright (C) 2004, 2006, 2007 Glenn Randers-Pehrson
+# Contributed by William L. Sebok, based on makefile.linux
+# Copyright (C) 1998 Greg Roelofs
+# Copyright (C) 1996, 1997 Andreas Dilger
+# For conditions of distribution and use, see copyright notice in png.h
+
+# Library name:
+LIBNAME = libpng10
+PNGMAJ = 0
+PNGMIN = 1.0.27rc1
+PNGVER = $(PNGMAJ).$(PNGMIN)
+
+# Shared library names:
+LIBSO=$(LIBNAME).so
+LIBSOMAJ=$(LIBNAME).so.$(PNGMAJ)
+LIBSOVER=$(LIBNAME).so.$(PNGVER)
+OLDSO=libpng.so
+OLDSOMAJ=libpng.so.2
+OLDSOVER=libpng.so.2.$(PNGMIN)
+
+# Utilities:
+AR_RC=ar rc
+CC=gcc
+MKDIR_P=mkdir -p
+LN_SF=ln -f -s
+RANLIB=echo
+RM_F=/bin/rm -f
+
+# Where make install puts libpng.a, libpng10.so*, and png.h
+prefix=/usr/local
+exec_prefix=$(prefix)
+
+# Where the zlib library and include files are located
+# Changing these to ../zlib poses a security risk.  If you want
+# to have zlib in an adjacent directory, specify the full path instead of "..".
+#ZLIBLIB=../zlib
+#ZLIBINC=../zlib
+
+ZLIBLIB=/usr/local/lib
+ZLIBINC=/usr/local/include
+
+WARNMORE=-Wwrite-strings -Wpointer-arith -Wshadow \
+	-Wmissing-declarations -Wtraditional -Wcast-align \
+	-Wstrict-prototypes -Wmissing-prototypes #-Wconversion
+CFLAGS=-I$(ZLIBINC) -Wall -O \
+	# $(WARNMORE) -g -DPNG_DEBUG=5
+LDFLAGS=-L. -R. -L$(ZLIBLIB) -R$(ZLIBLIB) -lpng10 -lz -lm
+
+INCPATH=$(prefix)/include
+LIBPATH=$(exec_prefix)/lib
+MANPATH=$(prefix)/man
+BINPATH=$(exec_prefix)/bin
+
+# override DESTDIR= on the make install command line to easily support
+# installing into a temporary location.  Example:
+#
+#    make install DESTDIR=/tmp/build/libpng
+#
+# If you're going to install into a temporary location
+# via DESTDIR, $(DESTDIR)$(prefix) must already exist before
+# you execute make install.
+DESTDIR=
+
+DB=$(DESTDIR)$(BINPATH)
+DI=$(DESTDIR)$(INCPATH)
+DL=$(DESTDIR)$(LIBPATH)
+DM=$(DESTDIR)$(MANPATH)
+
+OBJS = png.o pngset.o pngget.o pngrutil.o pngtrans.o pngwutil.o \
+	pngread.o pngrio.o pngwio.o pngwrite.o pngrtran.o \
+	pngwtran.o pngmem.o pngerror.o pngpread.o pnggccrd.o
+
+OBJSDLL = $(OBJS:.o=.pic.o)
+
+.SUFFIXES:      .c .o .pic.o
+
+.c.pic.o:
+	$(CC) -c $(CFLAGS) -fPIC -o $@ $*.c
+
+all: libpng.a $(LIBSO) pngtest libpng.pc libpng-config
+
+libpng.a: $(OBJS)
+	$(AR_RC) $@ $(OBJS)
+	$(RANLIB) $@
+
+libpng.pc:
+	cat scripts/libpng.pc.in | sed -e s\!@PREFIX@!$(prefix)! > libpng.pc
+
+libpng-config:
+	( cat scripts/libpng-config-head.in; \
+	echo prefix=\"$(prefix)\"; \
+	echo I_opts=\"-I$(INCPATH)/$(LIBNAME)\"; \
+	echo cppflags=\""; \
+	echo L_opts=\"-L$(LIBPATH)\"; \
+	echo R_opts=\"-R$(LIBPATH)\"; \
+	echo libs=\"-lpng10 -lz -lm\"; \
+	cat scripts/libpng-config-body.in ) > libpng-config
+	chmod +x libpng-config
+
+$(LIBSO): $(LIBSOMAJ)
+	$(LN_SF) $(LIBSOMAJ) $(LIBSO)
+
+$(LIBSOMAJ): $(LIBSOVER)
+	$(LN_SF) $(LIBSOVER) $(LIBSOMAJ)
+
+$(LIBSOVER): $(OBJSDLL)
+	@case "`type ld`" in *ucb*) \
+	echo; \
+	echo '## WARNING:'; \
+	echo '## The commands "CC" and "LD" must NOT refer to /usr/ucb/cc'; \
+	echo '## and /usr/ucb/ld.  If they do, you need to adjust your PATH'; \
+	echo '## environment variable to put /usr/ccs/bin ahead of /usr/ucb.'; \
+	echo '## The environment variable LD_LIBRARY_PATH should not be set'; \
+	echo '## at all.  If it is, things are likely to break because of'; \
+	echo '## the libucb dependency that is created.'; \
+	echo; \
+	;; \
+	esac
+	$(LD) -G -h $(LIBSOMAJ) \
+	 -o $(LIBSOVER) $(OBJSDLL)
+
+$(OLDSOVER): $(OBJS)
+	$(LD) -G -h $(OLDSOMAJ) \
+	 -o $(OLDSOVER) $(OBJSDLL)
+
+pngtest: pngtest.o $(LIBSO)
+	$(CC) -o pngtest $(CFLAGS) pngtest.o $(LDFLAGS)
+
+test: pngtest
+	./pngtest
+
+install-headers: png.h pngconf.h
+	-@if [ ! -d $(DI) ]; then $(MKDIR_P) $(DI); fi
+	-@if [ ! -d $(DI)/$(LIBNAME) ]; then $(MKDIR_P) $(DI)/$(LIBNAME); fi
+	cp png.h pngconf.h $(DI)/$(LIBNAME)
+	chmod 644 $(DI)/$(LIBNAME)/png.h $(DI)/$(LIBNAME)/pngconf.h
+	-@$(RM_F) $(DI)/png.h $(DI)/pngconf.h
+	-@$(RM_F) $(DI)/libpng
+	(cd $(DI); $(LN_SF) $(LIBNAME) libpng; $(LN_SF) $(LIBNAME)/* .)
+
+install-static: install-headers libpng.a
+	-@if [ ! -d $(DL) ]; then $(MKDIR_P) $(DL); fi
+	cp libpng.a $(DL)/$(LIBNAME).a
+	chmod 644 $(DL)/$(LIBNAME).a
+	-@$(RM_F) $(DL)/libpng.a
+	(cd $(DL); $(LN_SF) $(LIBNAME).a libpng.a)
+
+install-shared: install-headers $(LIBSOVER) libpng.pc \
+	$(OLDSOVER)
+	-@if [ ! -d $(DL) ]; then $(MKDIR_P) $(DL); fi
+	-@$(RM_F) $(DL)/$(LIBSOVER)* $(DL)/$(LIBSO)
+	-@$(RM_F) $(DL)/$(LIBSOMAJ)
+	-@$(RM_F) $(DL)/$(OLDSO)
+	-@$(RM_F) $(DL)/$(OLDSOMAJ)
+	-@$(RM_F) $(DL)/$(OLDSOVER)*
+	cp $(LIBSOVER) $(DL)
+	cp $(OLDSOVER) $(DL)
+	chmod 755 $(DL)/$(LIBSOVER)
+	chmod 755 $(DL)/$(OLDSOVER)
+	(cd $(DL); \
+	$(LN_SF) $(OLDSOVER) $(OLDSOMAJ); \
+	$(LN_SF) $(OLDSOMAJ) $(OLDSO); \
+	$(LN_SF) $(LIBSOVER) $(LIBSO); \
+	$(LN_SF) $(LIBSOVER) $(LIBSOMAJ))
+	-@if [ ! -d $(DL)/pkgconfig ]; then $(MKDIR_P) $(DL)/pkgconfig; fi
+	-@$(RM_F) $(DL)/pkgconfig/$(LIBNAME).pc
+	-@$(RM_F) $(DL)/pkgconfig/libpng.pc
+	cp libpng.pc $(DL)/pkgconfig/$(LIBNAME).pc
+	chmod 644 $(DL)/pkgconfig/$(LIBNAME).pc
+	(cd $(DL)/pkgconfig; $(LN_SF) $(LIBNAME).pc libpng.pc)
+
+install-man: libpng.3 libpngpf.3 png.5
+	-@if [ ! -d $(DM) ]; then $(MKDIR_P) $(DM); fi
+	-@if [ ! -d $(DM)/man3 ]; then $(MKDIR_P) $(DM)/man3; fi
+	-@$(RM_F) $(DM)/man3/libpng.3
+	-@$(RM_F) $(DM)/man3/libpngpf.3
+	cp libpng.3 $(DM)/man3
+	cp libpngpf.3 $(DM)/man3
+	-@if [ ! -d $(DM)/man5 ]; then $(MKDIR_P) $(DM)/man5; fi
+	-@$(RM_F) $(DM)/man5/png.5
+	cp png.5 $(DM)/man5
+
+install-config: libpng-config
+	-@if [ ! -d $(DB) ]; then $(MKDIR_P) $(DB); fi
+	-@$(RM_F) $(DB)/libpng-config
+	-@$(RM_F) $(DB)/$(LIBNAME)-config
+	cp libpng-config $(DB)/$(LIBNAME)-config
+	chmod 755 $(DB)/$(LIBNAME)-config
+	(cd $(DB); $(LN_SF) $(LIBNAME)-config libpng-config)
+
+install: install-static install-shared install-man install-config
+
+# If you installed in $(DESTDIR), test-installed won't work until you
+# move the library to its final location.  Use test-dd to test it
+# before then.
+
+test-dd:
+	echo
+	echo Testing installed dynamic shared library in $(DL).
+	$(CC) -I$(DI) -I$(ZLIBINC) \
+	   `$(BINPATH)/$(LIBNAME)-config --cflags` pngtest.c \
+	   -o pngtestd `$(BINPATH)/$(LIBNAME)-config --ldflags` \
+	   -L$(DL) -L$(ZLIBLIB) -R$(ZLIBLIB) -R$(DL)
+	./pngtestd pngtest.png
+
+test-installed:
+	echo
+	echo Testing installed dynamic shared library.
+	$(CC) -I$(ZLIBINC) \
+	   `$(BINPATH)/$(LIBNAME)-config --cflags` pngtest.c \
+	   -o pngtesti `$(BINPATH)/$(LIBNAME)-config --ldflags` \
+	   -L$(ZLIBLIB) -R$(ZLIBLIB)
+	./pngtesti pngtest.png
+
+clean:
+	$(RM_F) *.o libpng.a pngtest pngtesti pngout.png \
+	libpng-config $(LIBSO) $(LIBSOMAJ)* \
+	$(OLDSOVER) \
+	libpng.pc
+
+DOCS = ANNOUNCE CHANGES INSTALL KNOWNBUG LICENSE README TODO Y2KINFO
+writelock:
+	chmod a-w *.[ch35] $(DOCS) scripts/*
+
+# DO NOT DELETE THIS LINE -- make depend depends on it.
+
+png.o png.pic.o: png.h pngconf.h
+pngerror.o pngerror.pic.o: png.h pngconf.h
+pngrio.o pngrio.pic.o: png.h pngconf.h
+pngwio.o pngwio.pic.o: png.h pngconf.h
+pngmem.o pngmem.pic.o: png.h pngconf.h
+pngset.o pngset.pic.o: png.h pngconf.h
+pngget.o pngget.pic.o: png.h pngconf.h
+pnggccrd.o pnggccrd.pic.o: png.h pngconf.h
+pngread.o pngread.pic.o: png.h pngconf.h
+pngrtran.o pngrtran.pic.o: png.h pngconf.h
+pngrutil.o pngrutil.pic.o: png.h pngconf.h
+pngtrans.o pngtrans.pic.o: png.h pngconf.h
+pngwrite.o pngwrite.pic.o: png.h pngconf.h
+pngwtran.o pngwtran.pic.o: png.h pngconf.h
+pngwutil.o pngwutil.pic.o: png.h pngconf.h
+pngpread.o pngpread.pic.o: png.h pngconf.h
+
+pngtest.o: png.h pngconf.h
diff --git a/scripts/makefile.vcwin32 b/scripts/makefile.vcwin32
index d23d812..fc6ece6 100644
--- a/scripts/makefile.vcwin32
+++ b/scripts/makefile.vcwin32
@@ -11,7 +11,7 @@
 CC = cl
 LD = link
 AR = lib
-CFLAGS  = -nologo -MD -O2 -W3 -I..\zlib
+CFLAGS  = -nologo -DPNG_NO_MMX_CODE -MD -O2 -W3 -I..\zlib
 LDFLAGS = -nologo
 ARFLAGS = -nologo
 RM = del
diff --git a/scripts/pngos2.def b/scripts/pngos2.def
index 2763842..f211051 100644
--- a/scripts/pngos2.def
+++ b/scripts/pngos2.def
@@ -2,7 +2,7 @@
 ; PNG.LIB module definition file for OS/2
 ;----------------------------------------
 
-; Version 1.0.26
+; Version 1.0.27rc1
 
 LIBRARY		PNG
 DESCRIPTION	"PNG image compression library for OS/2"
diff --git a/scripts/pngw32.def b/scripts/pngw32.def
index 0f50020..d2db008 100644
--- a/scripts/pngw32.def
+++ b/scripts/pngw32.def
@@ -5,7 +5,7 @@
 LIBRARY
 
 EXPORTS
-;Version 1.0.26
+;Version 1.0.27rc1
   png_build_grayscale_palette  @1
   png_check_sig        @2
   png_chunk_error      @3