MultiSource/Applications/ClamAV/libclamav_pdf.c - third_party/llvm-test-suite - Git at Google

 /*
  *  Copyright (C) 2005-2007 Nigel Horne <njh@bandsman.co.uk>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  * TODO: Embedded fonts
  * TODO: Predictor image handling
  */
 static	char	const	rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";

 #if HAVE_CONFIG_H
 #include "clamav-config.h"
 #endif

 #ifdef	HAVE_MMAP
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ctype.h>
 #include <string.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <errno.h>
 #ifdef	HAVE_LIMITS_H
 #include <limits.h>
 #endif
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif

 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
 #endif

 #ifdef HAVE_ZLIB_H
 #include <zlib.h>
 #endif

 #ifdef	C_WINDOWS
 #include <io.h>
 #endif

 #include "clamav.h"
 #include "others.h"
 #include "mbox.h"
 #include "pdf.h"

 #ifdef	CL_DEBUG
 /*#define	SAVE_TMP	/* Save the file being worked on in tmp */
 #endif

 static	int	try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx);
 static	int	flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx);
 static	int	ascii85decode(const char *buf, off_t len, unsigned char *output);
 static	const	char	*pdf_nextlinestart(const char *ptr, size_t len);
 static	const	char	*pdf_nextobject(const char *ptr, size_t len);
 static	const	char	*cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);

 /*
  * TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
  */
 int
 cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
 {
 	off_t size;	/* total number of bytes in the file */
 	off_t bytesleft, trailerlength;
 	char *buf, *alloced;	/* start of memory mapped area */
 	const char *p, *q, *trailerstart;
 	const char *xrefstart;	/* cross reference table */
 	const struct cl_limits *limits;
 	/*size_t xreflength;*/
 	table_t *md5table;
 	int printed_predictor_message, printed_embedded_font_message, rc;
 	unsigned int files;
 	struct stat statb;

 	cli_dbgmsg("in cli_pdf(%s)\n", dir);

 	if(fstat(desc, &statb) < 0)
 		return CL_EOPEN;

 	size = statb.st_size;

 	if(size == 0)
 		return CL_CLEAN;

 	if(size <= 7)	/* doesn't even include the file header */
 		return CL_EFORMAT;

 	p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
 	if(buf == MAP_FAILED)
 		return CL_EMEM;

 	alloced = cli_malloc(size);
 	if(alloced) {
 		/*
 		 * FIXME: now I have this, there's no need for the lack of
 		 *	support on systems without mmap, e.g. cygwin
 		 */
 		memcpy(alloced, buf, size);
 		munmap(buf, size);
 		p = alloced;
 	}

 	cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size);

 	/* Lines are terminated by \r, \n or both */

 	/* File Header */
 	if(memcmp(p, "%PDF-1.", 7) != 0) {
 		if(alloced)
 			free(alloced);
 		else
 			munmap(buf, size);
 		return CL_EFORMAT;
 	}

 #if	0
 	q = pdf_nextlinestart(&p[6], size - 6);
 	if(q == NULL) {
 		if(alloced)
 			free(alloced);
 		else
 			munmap(buf, size);
 		return CL_EFORMAT;
 	}
 	bytesleft = size - (long)(q - p);
 	p = q;
 #else
 	p = &p[6];
 	bytesleft = size - 6;
 #endif

 	/* Find the file trailer */
 	for(q = &p[bytesleft - 6]; q > p; --q)
 		if(memcmp(q, "%%EOF", 5) == 0)
 			break;

 	if(q <= p) {
 		if(alloced)
 			free(alloced);
 		else
 			munmap(buf, size);
 		return CL_EFORMAT;
 	}

 	for(trailerstart = &q[-7]; trailerstart > p; --trailerstart)
 		if(memcmp(trailerstart, "trailer", 7) == 0)
 			break;

 	/*
 	 * q points to the end of the trailer section
 	 */
 	trailerlength = (long)(q - trailerstart);
 	if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
 		/*
 		 * This tends to mean that the file is, in effect, read-only
 		 */
 		if(alloced)
 			free(alloced);
 		else
 			munmap(buf, size);
 		cli_warnmsg("Encrypted PDF files not yet supported\n");
 		return CL_EFORMAT;
 	}

 	/*
 	 * not true, since edits may put data after the trailer
 	bytesleft -= trailerlength;
 	 */

 	/*
 	 * FIXME: Handle more than one xref section in the xref table
 	 */
 	for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
 		if(memcmp(xrefstart, "xref", 4) == 0)
 			/*
 			 * Make sure it's the start of the line, not a startxref
 			 * token
 			 */
 			if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
 				break;

 	if(xrefstart == p) {
 		if(alloced)
 			free(alloced);
 		else
 			munmap(buf, size);
 		return CL_EFORMAT;
 	}

 	printed_predictor_message = printed_embedded_font_message = 0;

 	md5table = tableCreate();
 	/*
 	 * not true, since edits may put data after the trailer
 	xreflength = (size_t)(trailerstart - xrefstart);
 	bytesleft -= xreflength;
 	 */

 	rc = CL_CLEAN;
 	files = 0;
 	limits = ctx->limits;

 	/*
 	 * The body section consists of a sequence of indirect objects
 	 */
 	while((p < xrefstart) && (rc == CL_CLEAN) &&
 	      ((q = pdf_nextobject(p, bytesleft)) != NULL)) {
 		int is_ascii85decode, is_flatedecode, fout, len, has_cr;
 		/*int object_number, generation_number;*/
 		const char *objstart, *objend, *streamstart, *streamend;
 		char *md5digest;
 		unsigned long length, objlen, real_streamlen, calculated_streamlen;
 		int is_embedded_font, predictor;
 		char fullname[NAME_MAX + 1];

 		if(q == xrefstart)
 			break;
 		if(memcmp(q, "xref", 4) == 0)
 			break;

 		/*object_number = atoi(q);*/
 		bytesleft -= (off_t)(q - p);
 		p = q;

 		if(memcmp(q, "endobj", 6) == 0)
 			continue;
 		if(!isdigit(*q)) {
 			cli_warnmsg("cli_pdf: Object number missing\n");
 			rc = CL_EFORMAT;
 			break;
 		}
 		q = pdf_nextobject(p, bytesleft);
 		if((q == NULL) || !isdigit(*q)) {
 			cli_warnmsg("cli_pdf: Generation number missing\n");
 			rc = CL_EFORMAT;
 			break;
 		}
 		/*generation_number = atoi(q);*/
 		bytesleft -= (off_t)(q - p);
 		p = q;

 		q = pdf_nextobject(p, bytesleft);
 		if((q == NULL) || (memcmp(q, "obj", 3) != 0)) {
 			cli_warnmsg("Indirect object missing \"obj\"\n");
 			rc = CL_EFORMAT;
 			break;
 		}

 		bytesleft -= (off_t)((q - p) + 3);
 		objstart = p = &q[3];
 		objend = cli_pmemstr(p, bytesleft, "endobj", 6);
 		if(objend == NULL) {
 			cli_dbgmsg("No matching endobj\n");
 			break;
 		}
 		bytesleft -= (off_t)((objend - p) + 6);
 		p = &objend[6];
 		objlen = (unsigned long)(objend - objstart);

 		/* Is this object a stream? */
 		streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
 		if(streamstart == NULL)
 			continue;

 		is_embedded_font = length = is_ascii85decode =
 			is_flatedecode = 0;
 		predictor = 1;

 		/*
 		 * TODO: handle F and FFilter?
 		 */
 		q = objstart;
 		while(q < streamstart) {
 			if(*q == '/') {	/* name object */
 				/*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/
 				if(strncmp(++q, "Length ", 7) == 0) {
 					q += 7;
 					length = atoi(q);
 					while(isdigit(*q))
 						q++;
 					/*
 					 * Note: incremental updates are not
 					 *	supported
 					 */
 					if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
 						const char *r;
 						char b[14];

 						q += 4;
 						cli_dbgmsg("Length is in indirect obj %ld\n",
 							length);
 						snprintf(b, sizeof(b),
 							"\n%ld 0 obj", length);
 						length = (unsigned long)strlen(b);
 						r = cli_pmemstr(alloced ? alloced : buf,
 							size, b, length);
 						if(r == NULL) {
 							b[0] = '\r';
 							r = cli_pmemstr(alloced ? alloced : buf,
 								size, b, length);
 						}
 						if(r) {
 							r += length - 1;
 							r = pdf_nextobject(r, bytesleft - (r - q));
 							if(r) {
 								length = atoi(r);
 								while(isdigit(*r))
 									r++;
 								cli_dbgmsg("length in '%s' %ld\n",
 									&b[1],
 									length);
 							}
 						} else
 							cli_warnmsg("Couldn't find '%s'\n",
 								&b[1]);
 					}
 					q--;
 				} else if(strncmp(q, "Length2 ", 8) == 0)
 					is_embedded_font = 1;
 				else if(strncmp(q, "Predictor ", 10) == 0) {
 					q += 10;
 					predictor = atoi(q);
 					while(isdigit(*q))
 						q++;
 					q--;
 				} else if(strncmp(q, "FlateDecode", 11) == 0) {
 					is_flatedecode = 1;
 					q += 11;
 				} else if(strncmp(q, "ASCII85Decode", 13) == 0) {
 					is_ascii85decode = 1;
 					q += 13;
 				}
 			}
 			q = pdf_nextobject(q, (size_t)(streamstart - q));
 			if(q == NULL)
 				break;
 		}

 		if(is_embedded_font) {
 			/*
 			 * Need some documentation, the only I can find a
 			 * reference to is not free, if some kind soul wishes
 			 * to donate a copy, please contact me!
 			 * (http://safari.adobepress.com/0321304748)
 			 */
 			if(!printed_embedded_font_message) {
 				cli_dbgmsg("Embedded fonts not yet supported\n");
 				printed_embedded_font_message = 1;
 			}
 			continue;
 		}
 		if(predictor > 1) {
 			/*
 			 * Needs some thought
 			 */
 			if(!printed_predictor_message) {
 				cli_dbgmsg("Predictor %d not honoured for embedded image\n",
 					predictor);
 				printed_predictor_message = 1;
 			}
 			continue;
 		}

 		/* objend points to the end of the object (start of "endobj") */
 		streamstart += 6;	/* go past the word "stream" */
 		len = (int)(objend - streamstart);
 		q = pdf_nextlinestart(streamstart, len);
 		if(q == NULL)
 			break;
 		len -= (int)(q - streamstart);
 		streamstart = q;
 		streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
 		if(streamend == NULL) {
 			streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
 			if(streamend == NULL) {
 				cli_dbgmsg("No endstream\n");
 				break;
 			}
 			has_cr = 1;
 		} else
 			has_cr = 0;
 		snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir);
 #if	defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
 		fout = mkstemp(fullname);
 #elif	defined(C_WINDOWS)
 		if(_mktemp(fullname) == NULL) {
 			/* mktemp only allows 26 files */
 			char *name = cli_gentemp(dir);
 			if(name == NULL)
 				fout = -1;
 			else {
 				strcpy(fullname, name);
 				free(name);
 				fout = open(fullname,
 					O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
 			}
 		} else
 			fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
 #else
 		mktemp(fullname);
 		fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
 #endif

 		if(fout < 0) {
 			cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno));
 			rc = CL_ETMPFILE;
 			break;
 		}

 		/*
 		 * Calculate the length ourself, the Length parameter is often
 		 * wrong
 		 */
 		if((*--streamend != '\n') && (*streamend != '\r'))
 			streamend++;
 		else if(has_cr && (*--streamend != '\r'))
 			streamend++;

 		if(streamend <= streamstart) {
 			close(fout);
 			cli_dbgmsg("Empty stream\n");
 			unlink(fullname);
 			continue;
 		}
 		calculated_streamlen = (int)(streamend - streamstart);
 		real_streamlen = length;

 		if(calculated_streamlen != real_streamlen)
 			cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");

 		cli_dbgmsg("length %ld, calculated_streamlen %ld isFlate %d isASCII85 %d\n",
 			length, calculated_streamlen,
 			is_flatedecode, is_ascii85decode);

 #if	0
 		/* FIXME: this isn't right... */
 		if(length)
 			/*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
 			streamlen = MIN(length, streamlen);
 #endif

 		if(is_ascii85decode) {
 			unsigned char *tmpbuf = cli_malloc(calculated_streamlen * 5);
 			int ret;

 			if(tmpbuf == NULL) {
 				close(fout);
 				unlink(fullname);
 				rc = CL_EMEM;
 				continue;
 			}

 			ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf);

 			if(ret == -1) {
 				free(tmpbuf);
 				close(fout);
 				unlink(fullname);
 				rc = CL_EFORMAT;
 				continue;
 			}
 			if(ret) {
 				unsigned char *t;

 				real_streamlen = ret;
 				/* free unused trailing bytes */
 				t = (unsigned char *)cli_realloc(tmpbuf,
 					calculated_streamlen);
 				if(t == NULL) {
 					free(tmpbuf);
 					close(fout);
 					unlink(fullname);
 					rc = CL_EMEM;
 					continue;
 				}
 				tmpbuf = t;
 				/*
 				 * Note that it will probably be both
 				 * ascii85encoded and flateencoded
 				 */
 				if(is_flatedecode)
 					rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
 				else
 					cli_writen(fout, (const char *)streamstart, real_streamlen);
 			}
 			free(tmpbuf);
 		} else if(is_flatedecode)
 			rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);

 		else {
 			cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n",
 				(unsigned long)real_streamlen);
 			cli_writen(fout, (const char *)streamstart, real_streamlen);
 		}

 		close(fout);
 		md5digest = cli_md5file(fullname);
 		if(tableFind(md5table, md5digest) >= 0) {
 			cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
 			unlink(fullname);
 		} else
 			tableInsert(md5table, md5digest, 1);
 		free(md5digest);
 		cli_dbgmsg("cli_pdf: extracted file %d to %s\n", ++files,
 			fullname);
 		if(limits && limits->maxfiles && (files >= limits->maxfiles)) {
 			/* Bug 698 */
 			cli_dbgmsg("cli_pdf: number of files exceeded %u\n", limits->maxfiles);
 			rc = CL_EMAXFILES;
 		}
 	}

 	if(alloced)
 		free(alloced);
 	else
 		munmap(buf, size);

 	tableDestroy(md5table);

 	cli_dbgmsg("cli_pdf: returning %d\n", rc);
 	return rc;
 }

 /*
  * flate inflation - returns clamAV status, e.g CL_SUCCESS, CL_EZIP
  */
 static int
 try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx)
 {
 	int ret = flatedecode(buf, real_len, fout, ctx);

 	if(ret == CL_SUCCESS)
 		return CL_SUCCESS;

 	if(real_len == calculated_len) {
 		/*
 		 * Nothing more we can do to inflate
 		 */
 		cli_warnmsg("Bad compression in flate stream\n");
 		return (ret == CL_SUCCESS) ? CL_EFORMAT : ret;
 	}

 	ret = flatedecode(buf, calculated_len, fout, ctx);
 	if(ret == CL_SUCCESS)
 		return CL_SUCCESS;

 	/* i.e. the PDF file is broken :-( */
 	cli_warnmsg("cli_pdf: Bad compressed block length in flate stream\n");

 	return ret;
 }

 static int
 flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx)
 {
 	int zstat;
 	off_t nbytes;
 	z_stream stream;
 	unsigned char output[BUFSIZ];
 #ifdef	SAVE_TMP
 	char tmpfilename[16];
 	int tmpfd;
 #endif

 	cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len);

 	if(len == 0) {
 		cli_warnmsg("cli_pdf: flatedecode len == 0\n");
 		return CL_CLEAN;
 	}

 #ifdef	SAVE_TMP
 	/*
 	 * Copy the embedded area for debugging, so that if it falls over
 	 * we have a copy of the offending data. This is debugging code
 	 * that you shouldn't of course install in a live environment. I am
 	 * not interested in hearing about security issues with this section
 	 * of the parser.
 	 */
 	strcpy(tmpfilename, "/tmp/pdfXXXXXX");
 	tmpfd = mkstemp(tmpfilename);
 	if(tmpfd < 0) {
 		perror(tmpfilename);
 		cli_errmsg("Can't make debugging file\n");
 	} else {
 		FILE *tmpfp = fdopen(tmpfd, "w");

 		if(tmpfp) {
 			fwrite(buf, sizeof(char), len, tmpfp);
 			fclose(tmpfp);
 			cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
 				tmpfilename);
 		} else
 			cli_errmsg("cli_pdf: can't fdopen debugging file\n");
 	}
 #endif
 	stream.zalloc = (alloc_func)Z_NULL;
 	stream.zfree = (free_func)Z_NULL;
 	stream.opaque = (void *)NULL;
 	stream.next_in = (Bytef *)buf;
 	stream.avail_in = len;
 	stream.next_out = output;
 	stream.avail_out = sizeof(output);

 	zstat = inflateInit(&stream);
 	if(zstat != Z_OK) {
 		cli_warnmsg("cli_pdf: inflateInit failed");
 		return CL_EZIP;
 	}

 	nbytes = 0;

 	while(stream.avail_in) {
 		zstat = inflate(&stream, Z_NO_FLUSH);	/* zlib */
 		switch(zstat) {
 			case Z_OK:
 				if(stream.avail_out == 0) {

 					nbytes += cli_writen(fout, output, sizeof(output));

 					if(ctx->limits &&
 					   ctx->limits->maxfilesize &&
 					   (nbytes > (off_t) ctx->limits->maxfilesize)) {
 						cli_dbgmsg("cli_pdf: flatedecode size exceeded (%lu)\n",
 							(unsigned long)nbytes);
 						inflateEnd(&stream);
 						if(BLOCKMAX) {
 							*ctx->virname = "PDF.ExceededFileSize";
 							return CL_VIRUS;
 						}
 						return CL_EZIP;
 					}
 					stream.next_out = output;
 					stream.avail_out = sizeof(output);
 				}
 				continue;
 			case Z_STREAM_END:
 				break;
 			default:
 				if(stream.msg)
 					cli_dbgmsg("pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n",
 						(unsigned long)nbytes,
 						stream.msg);
 				else
 					cli_dbgmsg("pdf: after writing %lu bytes, got error %d inflating PDF attachment\n",
 						(unsigned long)nbytes, zstat);
 				inflateEnd(&stream);
 				return (zstat == Z_OK) ? CL_SUCCESS : CL_EZIP;
 		}
 		break;
 	}

 	if(stream.avail_out != sizeof(output))
 		if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0)
 			return CL_EIO;

 	/*
 	 * On BSD systems total_in and total_out are "long long", so these
 	 * numbers could (in theory) get truncated in the debug statement
 	 */
 	cli_dbgmsg("cli_pdf: flatedecode in=%lu out=%lu ratio %lu (max %u)\n",
 		(unsigned long)stream.total_in, (unsigned long)stream.total_out,
 		(unsigned long)(stream.total_out / stream.total_in),
 		ctx->limits ? ctx->limits->maxratio : 0);

 	if(ctx->limits &&
 	   ctx->limits->maxratio &&
 	   ((stream.total_out / stream.total_in) > ctx->limits->maxratio)) {
 		cli_dbgmsg("cli_pdf: flatedecode Max ratio reached\n");
 		inflateEnd(&stream);
 		if(BLOCKMAX) {
 			*ctx->virname = "Oversized.PDF";
 			return CL_VIRUS;
 		}
 		return CL_EZIP;
 	}

 #ifdef	SAVE_TMP
 	unlink(tmpfilename);
 #endif
 	return inflateEnd(&stream) == Z_OK ? CL_SUCCESS : CL_EZIP;
 }

 /*
  * ascii85 inflation, returns number of bytes in output, -1 for error
  *
  * See http://www.piclist.com/techref/method/encode.htm (look for base85)
  */
 static int
 ascii85decode(const char *buf, off_t len, unsigned char *output)
 {
 	const char *ptr;
 	uint32_t sum = 0;
 	int quintet = 0;
 	int ret = 0;

 	if(cli_pmemstr(buf, len, "~>", 2) == NULL)
 		cli_warnmsg("ascii85decode: no EOF marker found\n");

 	ptr = buf;

 	cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);

 	while(len > 0) {
 		int byte = (len--) ? (int)*ptr++ : EOF;

 		if((byte == '~') && (*ptr == '>'))
 			byte = EOF;

 		if(byte >= '!' && byte <= 'u') {
 			sum = (sum * 85) + ((uint32_t)byte - '!');
 			if(++quintet == 5) {
 				*output++ = (unsigned char)(sum >> 24);
 				*output++ = (unsigned char)((sum >> 16) & 0xFF);
 				*output++ = (unsigned char)((sum >> 8) & 0xFF);
 				*output++ = (unsigned char)(sum & 0xFF);
 				ret += 4;
 				quintet = 0;
 				sum = 0;
 			}
 		} else if(byte == 'z') {
 			if(quintet) {
 				cli_warnmsg("ascii85decode: unexpected 'z'\n");
 				return -1;
 			}
 			*output++ = '\0';
 			*output++ = '\0';
 			*output++ = '\0';
 			*output++ = '\0';
 			ret += 4;
 		} else if(byte == EOF) {
 			cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
 			if(quintet) {
 				int i;

 				if(quintet == 1) {
 					cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n");
 					return -1;
 				}
 				for(i = quintet; i < 5; i++)
 					sum *= 85;

 				if(quintet > 1)
 					sum += (0xFFFFFF >> ((quintet - 2) * 8));
 				ret += quintet;
 				for(i = 0; i < quintet - 1; i++)
 					*output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
 				quintet = 0;
 			}
 			len = 0;
 			break;
 		} else if(!isspace(byte)) {
 			cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
 				byte & 0xFF, (unsigned long)len);
 			return -1;
 		}
 	}
 	return ret;
 }

 /*
  * Find the start of the next line
  */
 static const char *
 pdf_nextlinestart(const char *ptr, size_t len)
 {
 	while(strchr("\r\n", *ptr) == NULL) {
 		if(--len == 0L)
 			return NULL;
 		ptr++;
 	}
 	while(strchr("\r\n", *ptr) != NULL) {
 		if(--len == 0L)
 			return NULL;
 		ptr++;
 	}
 	return ptr;
 }

 /*
  * Return the start of the next PDF object.
  * This assumes that we're not in a stream.
  */
 static const char *
 pdf_nextobject(const char *ptr, size_t len)
 {
 	const char *p;
 	int inobject = 1;

 	while(len) {
 		switch(*ptr) {
 			case '\n':
 			case '\r':
 			case '%':	/* comment */
 				p = pdf_nextlinestart(ptr, len);
 				if(p == NULL)
 					return NULL;
 				len -= (size_t)(p - ptr);
 				ptr = p;
 				inobject = 0;
 				break;

 			case ' ':
 			case '\t':
 			case '[':	/* Start of an array object */
 			case '\v':
 			case '\f':
 			case '<':	/* Start of a dictionary object */
 				inobject = 0;
 				ptr++;
 				len--;
 				break;
 			case '/':	/* Start of a name object */
 				return ptr;
 			default:
 				if(!inobject)
 					/* TODO: parse and return object type */
 					return ptr;
 				ptr++;
 				len--;
 		}
 	}
 	return NULL;
 }

 /*
  * like cli_memstr - but returns the location of the match
  * FIXME: need a case insensitive version
  */
 static const char *
 cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
 {
 	const char *pt, *hay;
 	size_t n;

 	if(haystack == needle)
 		return haystack;

 	if(hs < ns)
 		return NULL;

 	if(memcmp(haystack, needle, ns) == 0)
 		return haystack;

 	pt = hay = haystack;
 	n = hs;

 	while((pt = memchr(hay, needle[0], n)) != NULL) {
 		n -= (size_t)(pt - hay);
 		if(n < ns)
 			break;

 		if(memcmp(pt, needle, ns) == 0)
 			return pt;

 		if(hay == pt) {
 			n--;
 			hay++;
 		} else
 			hay = pt;
 	}

 	return NULL;
 }
 #else	/*!HAVE_MMAP*/

 #include "clamav.h"
 #include "others.h"
 #include "pdf.h"

 int
 cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
 {
 	cli_warnmsg("File not decoded - PDF decoding needs mmap() (for now)\n");
 	return CL_CLEAN;
 }
 #endif
	/*
	* Copyright (C) 2005-2007 Nigel Horne <njh@bandsman.co.uk>
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	*
	* TODO: Embedded fonts
	* TODO: Predictor image handling
	*/
	static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";

	#if HAVE_CONFIG_H
	#include "clamav-config.h"
	#endif

	#ifdef HAVE_MMAP
	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <ctype.h>
	#include <string.h>
	#include <fcntl.h>
	#include <stdlib.h>
	#include <errno.h>
	#ifdef HAVE_LIMITS_H
	#include <limits.h>
	#endif
	#ifdef HAVE_UNISTD_H
	#include <unistd.h>
	#endif

	#ifdef HAVE_SYS_MMAN_H
	#include <sys/mman.h>
	#endif

	#ifdef HAVE_ZLIB_H
	#include <zlib.h>
	#endif

	#ifdef C_WINDOWS
	#include <io.h>
	#endif

	#include "clamav.h"
	#include "others.h"
	#include "mbox.h"
	#include "pdf.h"

	#ifdef CL_DEBUG
	/#define SAVE_TMP / Save the file being worked on in tmp */
	#endif

	static int try_flatedecode(unsigned char buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx ctx);
	static int flatedecode(unsigned char buf, off_t len, int fout, const cli_ctx ctx);
	static int ascii85decode(const char buf, off_t len, unsigned char output);
	static const char pdf_nextlinestart(const char ptr, size_t len);
	static const char pdf_nextobject(const char ptr, size_t len);
	static const char cli_pmemstr(const char haystack, size_t hs, const char *needle, size_t ns);

	/*
	* TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
	*/
	int
	cli_pdf(const char dir, int desc, const cli_ctx ctx)
	{
	off_t size; /* total number of bytes in the file */
	off_t bytesleft, trailerlength;
	char buf, alloced; /* start of memory mapped area */
	const char p, q, *trailerstart;
	const char xrefstart; / cross reference table */
	const struct cl_limits *limits;
	/size_t xreflength;/
	table_t *md5table;
	int printed_predictor_message, printed_embedded_font_message, rc;
	unsigned int files;
	struct stat statb;

	cli_dbgmsg("in cli_pdf(%s)\n", dir);

	if(fstat(desc, &statb) < 0)
	return CL_EOPEN;

	size = statb.st_size;

	if(size == 0)
	return CL_CLEAN;

	if(size <= 7) /* doesn't even include the file header */
	return CL_EFORMAT;

	p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
	if(buf == MAP_FAILED)
	return CL_EMEM;

	alloced = cli_malloc(size);
	if(alloced) {
	/*
	* FIXME: now I have this, there's no need for the lack of
	* support on systems without mmap, e.g. cygwin
	*/
	memcpy(alloced, buf, size);
	munmap(buf, size);
	p = alloced;
	}

	cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size);

	/* Lines are terminated by \r, \n or both */

	/* File Header */
	if(memcmp(p, "%PDF-1.", 7) != 0) {
	if(alloced)
	free(alloced);
	else
	munmap(buf, size);
	return CL_EFORMAT;
	}

	#if 0
	q = pdf_nextlinestart(&p[6], size - 6);
	if(q == NULL) {
	if(alloced)
	free(alloced);
	else
	munmap(buf, size);
	return CL_EFORMAT;
	}
	bytesleft = size - (long)(q - p);
	p = q;
	#else
	p = &p[6];
	bytesleft = size - 6;
	#endif

	/* Find the file trailer */
	for(q = &p[bytesleft - 6]; q > p; --q)
	if(memcmp(q, "%%EOF", 5) == 0)
	break;

	if(q <= p) {
	if(alloced)
	free(alloced);
	else
	munmap(buf, size);
	return CL_EFORMAT;
	}

	for(trailerstart = &q[-7]; trailerstart > p; --trailerstart)
	if(memcmp(trailerstart, "trailer", 7) == 0)
	break;

	/*
	* q points to the end of the trailer section
	*/
	trailerlength = (long)(q - trailerstart);
	if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
	/*
	* This tends to mean that the file is, in effect, read-only
	*/
	if(alloced)
	free(alloced);
	else
	munmap(buf, size);
	cli_warnmsg("Encrypted PDF files not yet supported\n");
	return CL_EFORMAT;
	}

	/*
	* not true, since edits may put data after the trailer
	bytesleft -= trailerlength;
	*/

	/*
	* FIXME: Handle more than one xref section in the xref table
	*/
	for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
	if(memcmp(xrefstart, "xref", 4) == 0)
	/*
	* Make sure it's the start of the line, not a startxref
	* token
	*/
	if((xrefstart[-1] == '\n') \|\| (xrefstart[-1] == '\r'))
	break;

	if(xrefstart == p) {
	if(alloced)
	free(alloced);
	else
	munmap(buf, size);
	return CL_EFORMAT;
	}

	printed_predictor_message = printed_embedded_font_message = 0;

	md5table = tableCreate();
	/*
	* not true, since edits may put data after the trailer
	xreflength = (size_t)(trailerstart - xrefstart);
	bytesleft -= xreflength;
	*/

	rc = CL_CLEAN;
	files = 0;
	limits = ctx->limits;

	/*
	* The body section consists of a sequence of indirect objects
	*/
	while((p < xrefstart) && (rc == CL_CLEAN) &&
	((q = pdf_nextobject(p, bytesleft)) != NULL)) {
	int is_ascii85decode, is_flatedecode, fout, len, has_cr;
	/int object_number, generation_number;/
	const char objstart, objend, streamstart, streamend;
	char *md5digest;
	unsigned long length, objlen, real_streamlen, calculated_streamlen;
	int is_embedded_font, predictor;
	char fullname[NAME_MAX + 1];

	if(q == xrefstart)
	break;
	if(memcmp(q, "xref", 4) == 0)
	break;

	/object_number = atoi(q);/
	bytesleft -= (off_t)(q - p);
	p = q;

	if(memcmp(q, "endobj", 6) == 0)
	continue;
	if(!isdigit(*q)) {
	cli_warnmsg("cli_pdf: Object number missing\n");
	rc = CL_EFORMAT;
	break;
	}
	q = pdf_nextobject(p, bytesleft);
	if((q == NULL) \|\| !isdigit(*q)) {
	cli_warnmsg("cli_pdf: Generation number missing\n");
	rc = CL_EFORMAT;
	break;
	}
	/generation_number = atoi(q);/
	bytesleft -= (off_t)(q - p);
	p = q;

	q = pdf_nextobject(p, bytesleft);
	if((q == NULL) \|\| (memcmp(q, "obj", 3) != 0)) {
	cli_warnmsg("Indirect object missing \"obj\"\n");
	rc = CL_EFORMAT;
	break;
	}

	bytesleft -= (off_t)((q - p) + 3);
	objstart = p = &q[3];
	objend = cli_pmemstr(p, bytesleft, "endobj", 6);
	if(objend == NULL) {
	cli_dbgmsg("No matching endobj\n");
	break;
	}
	bytesleft -= (off_t)((objend - p) + 6);
	p = &objend[6];
	objlen = (unsigned long)(objend - objstart);

	/* Is this object a stream? */
	streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
	if(streamstart == NULL)
	continue;

	is_embedded_font = length = is_ascii85decode =
	is_flatedecode = 0;
	predictor = 1;

	/*
	* TODO: handle F and FFilter?
	*/
	q = objstart;
	while(q < streamstart) {
	if(q == '/') { / name object */
	/cli_dbgmsg("Name object %8.8s\n", q+1, q+1);/
	if(strncmp(++q, "Length ", 7) == 0) {
	q += 7;
	length = atoi(q);
	while(isdigit(*q))
	q++;
	/*
	* Note: incremental updates are not
	* supported
	*/
	if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
	const char *r;
	char b[14];

	q += 4;
	cli_dbgmsg("Length is in indirect obj %ld\n",
	length);
	snprintf(b, sizeof(b),
	"\n%ld 0 obj", length);
	length = (unsigned long)strlen(b);
	r = cli_pmemstr(alloced ? alloced : buf,
	size, b, length);
	if(r == NULL) {
	b[0] = '\r';
	r = cli_pmemstr(alloced ? alloced : buf,
	size, b, length);
	}
	if(r) {
	r += length - 1;
	r = pdf_nextobject(r, bytesleft - (r - q));
	if(r) {
	length = atoi(r);
	while(isdigit(*r))
	r++;
	cli_dbgmsg("length in '%s' %ld\n",
	&b[1],
	length);
	}
	} else
	cli_warnmsg("Couldn't find '%s'\n",
	&b[1]);
	}
	q--;
	} else if(strncmp(q, "Length2 ", 8) == 0)
	is_embedded_font = 1;
	else if(strncmp(q, "Predictor ", 10) == 0) {
	q += 10;
	predictor = atoi(q);
	while(isdigit(*q))
	q++;
	q--;
	} else if(strncmp(q, "FlateDecode", 11) == 0) {
	is_flatedecode = 1;
	q += 11;
	} else if(strncmp(q, "ASCII85Decode", 13) == 0) {
	is_ascii85decode = 1;
	q += 13;
	}
	}
	q = pdf_nextobject(q, (size_t)(streamstart - q));
	if(q == NULL)
	break;
	}

	if(is_embedded_font) {
	/*
	* Need some documentation, the only I can find a
	* reference to is not free, if some kind soul wishes
	* to donate a copy, please contact me!
	* (http://safari.adobepress.com/0321304748)
	*/
	if(!printed_embedded_font_message) {
	cli_dbgmsg("Embedded fonts not yet supported\n");
	printed_embedded_font_message = 1;
	}
	continue;
	}
	if(predictor > 1) {
	/*
	* Needs some thought
	*/
	if(!printed_predictor_message) {
	cli_dbgmsg("Predictor %d not honoured for embedded image\n",
	predictor);
	printed_predictor_message = 1;
	}
	continue;
	}

	/* objend points to the end of the object (start of "endobj") */
	streamstart += 6; /* go past the word "stream" */
	len = (int)(objend - streamstart);
	q = pdf_nextlinestart(streamstart, len);
	if(q == NULL)
	break;
	len -= (int)(q - streamstart);
	streamstart = q;
	streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
	if(streamend == NULL) {
	streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
	if(streamend == NULL) {
	cli_dbgmsg("No endstream\n");
	break;
	}
	has_cr = 1;
	} else
	has_cr = 0;
	snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir);
	#if defined(C_LINUX) \|\| defined(C_BSD) \|\| defined(HAVE_MKSTEMP) \|\| defined(C_SOLARIS) \|\| defined(C_CYGWIN)
	fout = mkstemp(fullname);
	#elif defined(C_WINDOWS)
	if(_mktemp(fullname) == NULL) {
	/* mktemp only allows 26 files */
	char *name = cli_gentemp(dir);
	if(name == NULL)
	fout = -1;
	else {
	strcpy(fullname, name);
	free(name);
	fout = open(fullname,
	O_WRONLY\|O_CREAT\|O_EXCL\|O_TRUNC\|O_BINARY, 0600);
	}
	} else
	fout = open(fullname, O_WRONLY\|O_CREAT\|O_EXCL\|O_TRUNC\|O_BINARY, 0600);
	#else
	mktemp(fullname);
	fout = open(fullname, O_WRONLY\|O_CREAT\|O_EXCL\|O_TRUNC\|O_BINARY, 0600);
	#endif

	if(fout < 0) {
	cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno));
	rc = CL_ETMPFILE;
	break;
	}

	/*
	* Calculate the length ourself, the Length parameter is often
	* wrong
	*/
	if((--streamend != '\n') && (streamend != '\r'))
	streamend++;
	else if(has_cr && (*--streamend != '\r'))
	streamend++;

	if(streamend <= streamstart) {
	close(fout);
	cli_dbgmsg("Empty stream\n");
	unlink(fullname);
	continue;
	}
	calculated_streamlen = (int)(streamend - streamstart);
	real_streamlen = length;

	if(calculated_streamlen != real_streamlen)
	cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");

	cli_dbgmsg("length %ld, calculated_streamlen %ld isFlate %d isASCII85 %d\n",
	length, calculated_streamlen,
	is_flatedecode, is_ascii85decode);

	#if 0
	/* FIXME: this isn't right... */
	if(length)
	/streamlen = (is_flatedecode) ? length : MIN(length, streamlen);/
	streamlen = MIN(length, streamlen);
	#endif

	if(is_ascii85decode) {
	unsigned char tmpbuf = cli_malloc(calculated_streamlen 5);
	int ret;

	if(tmpbuf == NULL) {
	close(fout);
	unlink(fullname);
	rc = CL_EMEM;
	continue;
	}

	ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf);

	if(ret == -1) {
	free(tmpbuf);
	close(fout);
	unlink(fullname);
	rc = CL_EFORMAT;
	continue;
	}
	if(ret) {
	unsigned char *t;

	real_streamlen = ret;
	/* free unused trailing bytes */
	t = (unsigned char *)cli_realloc(tmpbuf,
	calculated_streamlen);
	if(t == NULL) {
	free(tmpbuf);
	close(fout);
	unlink(fullname);
	rc = CL_EMEM;
	continue;
	}
	tmpbuf = t;
	/*
	* Note that it will probably be both
	* ascii85encoded and flateencoded
	*/
	if(is_flatedecode)
	rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
	else
	cli_writen(fout, (const char *)streamstart, real_streamlen);
	}
	free(tmpbuf);
	} else if(is_flatedecode)
	rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);

	else {
	cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n",
	(unsigned long)real_streamlen);
	cli_writen(fout, (const char *)streamstart, real_streamlen);
	}

	close(fout);
	md5digest = cli_md5file(fullname);
	if(tableFind(md5table, md5digest) >= 0) {
	cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
	unlink(fullname);
	} else
	tableInsert(md5table, md5digest, 1);
	free(md5digest);
	cli_dbgmsg("cli_pdf: extracted file %d to %s\n", ++files,
	fullname);
	if(limits && limits->maxfiles && (files >= limits->maxfiles)) {
	/* Bug 698 */
	cli_dbgmsg("cli_pdf: number of files exceeded %u\n", limits->maxfiles);
	rc = CL_EMAXFILES;
	}
	}

	if(alloced)
	free(alloced);
	else
	munmap(buf, size);

	tableDestroy(md5table);

	cli_dbgmsg("cli_pdf: returning %d\n", rc);
	return rc;
	}

	/*
	* flate inflation - returns clamAV status, e.g CL_SUCCESS, CL_EZIP
	*/
	static int
	try_flatedecode(unsigned char buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx ctx)
	{
	int ret = flatedecode(buf, real_len, fout, ctx);

	if(ret == CL_SUCCESS)
	return CL_SUCCESS;

	if(real_len == calculated_len) {
	/*
	* Nothing more we can do to inflate
	*/
	cli_warnmsg("Bad compression in flate stream\n");
	return (ret == CL_SUCCESS) ? CL_EFORMAT : ret;
	}

	ret = flatedecode(buf, calculated_len, fout, ctx);
	if(ret == CL_SUCCESS)
	return CL_SUCCESS;

	/* i.e. the PDF file is broken :-( */
	cli_warnmsg("cli_pdf: Bad compressed block length in flate stream\n");

	return ret;
	}

	static int
	flatedecode(unsigned char buf, off_t len, int fout, const cli_ctx ctx)
	{
	int zstat;
	off_t nbytes;
	z_stream stream;
	unsigned char output[BUFSIZ];
	#ifdef SAVE_TMP
	char tmpfilename[16];
	int tmpfd;
	#endif

	cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len);

	if(len == 0) {
	cli_warnmsg("cli_pdf: flatedecode len == 0\n");
	return CL_CLEAN;
	}

	#ifdef SAVE_TMP
	/*
	* Copy the embedded area for debugging, so that if it falls over
	* we have a copy of the offending data. This is debugging code
	* that you shouldn't of course install in a live environment. I am
	* not interested in hearing about security issues with this section
	* of the parser.
	*/
	strcpy(tmpfilename, "/tmp/pdfXXXXXX");
	tmpfd = mkstemp(tmpfilename);
	if(tmpfd < 0) {
	perror(tmpfilename);
	cli_errmsg("Can't make debugging file\n");
	} else {
	FILE *tmpfp = fdopen(tmpfd, "w");

	if(tmpfp) {
	fwrite(buf, sizeof(char), len, tmpfp);
	fclose(tmpfp);
	cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
	tmpfilename);
	} else
	cli_errmsg("cli_pdf: can't fdopen debugging file\n");
	}
	#endif
	stream.zalloc = (alloc_func)Z_NULL;
	stream.zfree = (free_func)Z_NULL;
	stream.opaque = (void *)NULL;
	stream.next_in = (Bytef *)buf;
	stream.avail_in = len;
	stream.next_out = output;
	stream.avail_out = sizeof(output);

	zstat = inflateInit(&stream);
	if(zstat != Z_OK) {
	cli_warnmsg("cli_pdf: inflateInit failed");
	return CL_EZIP;
	}

	nbytes = 0;

	while(stream.avail_in) {
	zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */
	switch(zstat) {
	case Z_OK:
	if(stream.avail_out == 0) {

	nbytes += cli_writen(fout, output, sizeof(output));

	if(ctx->limits &&
	ctx->limits->maxfilesize &&
	(nbytes > (off_t) ctx->limits->maxfilesize)) {
	cli_dbgmsg("cli_pdf: flatedecode size exceeded (%lu)\n",
	(unsigned long)nbytes);
	inflateEnd(&stream);
	if(BLOCKMAX) {
	*ctx->virname = "PDF.ExceededFileSize";
	return CL_VIRUS;
	}
	return CL_EZIP;
	}
	stream.next_out = output;
	stream.avail_out = sizeof(output);
	}
	continue;
	case Z_STREAM_END:
	break;
	default:
	if(stream.msg)
	cli_dbgmsg("pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n",
	(unsigned long)nbytes,
	stream.msg);
	else
	cli_dbgmsg("pdf: after writing %lu bytes, got error %d inflating PDF attachment\n",
	(unsigned long)nbytes, zstat);
	inflateEnd(&stream);
	return (zstat == Z_OK) ? CL_SUCCESS : CL_EZIP;
	}
	break;
	}

	if(stream.avail_out != sizeof(output))
	if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0)
	return CL_EIO;

	/*
	* On BSD systems total_in and total_out are "long long", so these
	* numbers could (in theory) get truncated in the debug statement
	*/
	cli_dbgmsg("cli_pdf: flatedecode in=%lu out=%lu ratio %lu (max %u)\n",
	(unsigned long)stream.total_in, (unsigned long)stream.total_out,
	(unsigned long)(stream.total_out / stream.total_in),
	ctx->limits ? ctx->limits->maxratio : 0);

	if(ctx->limits &&
	ctx->limits->maxratio &&
	((stream.total_out / stream.total_in) > ctx->limits->maxratio)) {
	cli_dbgmsg("cli_pdf: flatedecode Max ratio reached\n");
	inflateEnd(&stream);
	if(BLOCKMAX) {
	*ctx->virname = "Oversized.PDF";
	return CL_VIRUS;
	}
	return CL_EZIP;
	}

	#ifdef SAVE_TMP
	unlink(tmpfilename);
	#endif
	return inflateEnd(&stream) == Z_OK ? CL_SUCCESS : CL_EZIP;
	}

	/*
	* ascii85 inflation, returns number of bytes in output, -1 for error
	*
	* See http://www.piclist.com/techref/method/encode.htm (look for base85)
	*/
	static int
	ascii85decode(const char buf, off_t len, unsigned char output)
	{
	const char *ptr;
	uint32_t sum = 0;
	int quintet = 0;
	int ret = 0;

	if(cli_pmemstr(buf, len, "~>", 2) == NULL)
	cli_warnmsg("ascii85decode: no EOF marker found\n");

	ptr = buf;

	cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);

	while(len > 0) {
	int byte = (len--) ? (int)*ptr++ : EOF;

	if((byte == '~') && (*ptr == '>'))
	byte = EOF;

	if(byte >= '!' && byte <= 'u') {
	sum = (sum * 85) + ((uint32_t)byte - '!');
	if(++quintet == 5) {
	*output++ = (unsigned char)(sum >> 24);
	*output++ = (unsigned char)((sum >> 16) & 0xFF);
	*output++ = (unsigned char)((sum >> 8) & 0xFF);
	*output++ = (unsigned char)(sum & 0xFF);
	ret += 4;
	quintet = 0;
	sum = 0;
	}
	} else if(byte == 'z') {
	if(quintet) {
	cli_warnmsg("ascii85decode: unexpected 'z'\n");
	return -1;
	}
	*output++ = '\0';
	*output++ = '\0';
	*output++ = '\0';
	*output++ = '\0';
	ret += 4;
	} else if(byte == EOF) {
	cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
	if(quintet) {
	int i;

	if(quintet == 1) {
	cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n");
	return -1;
	}
	for(i = quintet; i < 5; i++)
	sum *= 85;

	if(quintet > 1)
	sum += (0xFFFFFF >> ((quintet - 2) * 8));
	ret += quintet;
	for(i = 0; i < quintet - 1; i++)
	output++ = (unsigned char)((sum >> (24 - 8 i)) & 0xFF);
	quintet = 0;
	}
	len = 0;
	break;
	} else if(!isspace(byte)) {
	cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
	byte & 0xFF, (unsigned long)len);
	return -1;
	}
	}
	return ret;
	}

	/*
	* Find the start of the next line
	*/
	static const char *
	pdf_nextlinestart(const char *ptr, size_t len)
	{
	while(strchr("\r\n", *ptr) == NULL) {
	if(--len == 0L)
	return NULL;
	ptr++;
	}
	while(strchr("\r\n", *ptr) != NULL) {
	if(--len == 0L)
	return NULL;
	ptr++;
	}
	return ptr;
	}

	/*
	* Return the start of the next PDF object.
	* This assumes that we're not in a stream.
	*/
	static const char *
	pdf_nextobject(const char *ptr, size_t len)
	{
	const char *p;
	int inobject = 1;

	while(len) {
	switch(*ptr) {
	case '\n':
	case '\r':
	case '%': /* comment */
	p = pdf_nextlinestart(ptr, len);
	if(p == NULL)
	return NULL;
	len -= (size_t)(p - ptr);
	ptr = p;
	inobject = 0;
	break;

	case ' ':
	case '\t':
	case '[': /* Start of an array object */
	case '\v':
	case '\f':
	case '<': /* Start of a dictionary object */
	inobject = 0;
	ptr++;
	len--;
	break;
	case '/': /* Start of a name object */
	return ptr;
	default:
	if(!inobject)
	/* TODO: parse and return object type */
	return ptr;
	ptr++;
	len--;
	}
	}
	return NULL;
	}

	/*
	* like cli_memstr - but returns the location of the match
	* FIXME: need a case insensitive version
	*/
	static const char *
	cli_pmemstr(const char haystack, size_t hs, const char needle, size_t ns)
	{
	const char pt, hay;
	size_t n;

	if(haystack == needle)
	return haystack;

	if(hs < ns)
	return NULL;

	if(memcmp(haystack, needle, ns) == 0)
	return haystack;

	pt = hay = haystack;
	n = hs;

	while((pt = memchr(hay, needle[0], n)) != NULL) {
	n -= (size_t)(pt - hay);
	if(n < ns)
	break;

	if(memcmp(pt, needle, ns) == 0)
	return pt;

	if(hay == pt) {
	n--;
	hay++;
	} else
	hay = pt;
	}

	return NULL;
	}
	#else /!HAVE_MMAP/

	#include "clamav.h"
	#include "others.h"
	#include "pdf.h"

	int
	cli_pdf(const char dir, int desc, const cli_ctx ctx)
	{
	cli_warnmsg("File not decoded - PDF decoding needs mmap() (for now)\n");
	return CL_CLEAN;
	}
	#endif