blob: bc909c4ead17471e9e4c158fe6aa6a5ccd1b24ca [file] [log] [blame]
/*@z45.c:External Sort:SortFile()@********************************************/
/* */
/* THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.24) */
/* COPYRIGHT (C) 1991, 2000 Jeffrey H. Kingston */
/* */
/* Jeffrey H. Kingston (jeff@cs.usyd.edu.au) */
/* Basser Department of Computer Science */
/* The University of Sydney 2006 */
/* AUSTRALIA */
/* */
/* This program is free software; you can redistribute it and/or modify */
/* it under the terms of the GNU General Public License as published by */
/* the Free Software Foundation; either Version 2, or (at your option) */
/* any later version. */
/* */
/* This program is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
/* GNU General Public License for more details. */
/* */
/* You should have received a copy of the GNU General Public License */
/* along with this program; if not, write to the Free Software */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA */
/* */
/* FILE: z45.c */
/* MODULE: External Sort */
/* EXTERNS: SortFile() */
/* */
/* This simple sort utility assumes that the source file can all be read */
/* into memory. If not, you get an "out of memory" error message. */
/* */
/*****************************************************************************/
#include "externs.h"
#define BUFF_SIZE 4096 /* size of one memory buffer */
#define LINES_GUESS 2000 /* initial guess of number of lines */
/*****************************************************************************/
/* */
/* LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, *len) */
/* */
/* Read all of the lines of fp into memory and return a null-terminated */
/* array of pointers to these lines, and set *len to the number of lines. */
/* Make sure the lines themselves are null-terminated, also. */
/* */
/* fname is the name of the file being sorted, and is used for error */
/* messages only. */
/* */
/* if first_line is non-null then it is a pointer to a string which is */
/* to become as the first line of the result. This string needs copying. */
/* */
/*****************************************************************************/
LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, int *len)
{
char *buff; /* the current input line buffer */
char *buff_top; /* first spot off end of buffer */
char *bp; /* first free spot in buff */
LINE *lines; /* the array of pointers to lines */
int lines_length; /* the length of the lines array */
LINE *lines_top; /* first spot off end of lines */
LINE *lp; /* first free spot in lines */
char *p, *q;
int ch;
debug1(DEX, D, "ReadLines(-, %s, -)", fname);
/* initialize buff to be empty with size BUFF_SIZE */
buff = malloc(BUFF_SIZE * sizeof(char));
if( buff == NULL )
Error(45, 1, "run out of memory when reading index file %s",
FATAL, no_fpos, fname);
buff_top = buff + BUFF_SIZE;
bp = buff;
/* initialize the lines buffer to be the first line */
lines_length = LINES_GUESS;
lines = malloc(lines_length * sizeof(LINE *));
lines_top = &lines[lines_length];
lp = lines;
/* add first_line to lines buffer if required */
if( first_line != (FULL_CHAR *) null )
{
*lp = malloc((StringLength(first_line) + 1) * sizeof(char));
StringCopy( (char *) *lp, first_line);
lp++;
}
*lp++ = bp;
while( (ch = getc(fp)) != EOF )
{
debug4(DEX, DD, "lines: [%d %d(%d) %d]",
(int) lines, (int) (lp-1), (int) *(lp-1), (int) lines_top -1);
debug3(DEX, DD, " buff: [%d bp %d %d]",
(int) buff, (int) bp, (int) buff_top - 1);
assert( (int) buff >= (int) lines_top ||
(int) buff_top <= (int) lines,
"ReadLines: lines and buff overlap!" );
/* get new buffer and copy current line across if out of buff space */
if( bp == buff_top )
{
debug0(DEX, D, " getting new buff");
buff = malloc(BUFF_SIZE * sizeof(char));
if( buff == NULL )
Error(45, 2, "run out of memory when reading index file %s",
FATAL, no_fpos, fname);
buff_top = buff + BUFF_SIZE;
for( p = buff, q = *(lp-1); q != bp; *p++ = *q++ );
bp = p; *bp = '\0';
debug1(DEX, D, " copied into new buff: %s", buff);
*(lp-1) = buff;
if( bp == buff_top )
Error(45, 3, "line too long when reading index file %s",
FATAL, no_fpos, fname);
}
/* if newline char, end this line and start the next */
if( ch == '\n' )
{
*bp++ = '\0';
debug1(DEX, D, " finished line: %s", *(lp-1));
/* if no room in lines for next line, double its size */
if( lp == lines_top )
{
debug1(DEX, D, " realloc(lines, %d)", 2 * lines_length);
lines = realloc(lines, 2 * lines_length * sizeof(LINE *));
if( lines == NULL )
Error(45, 4, "run out of memory when reading index file %s",
FATAL, no_fpos, fname);
lp = &lines[lines_length];
lines_length = 2 * lines_length;
lines_top = &lines[lines_length];
}
*lp++ = bp;
}
else /* ordinary char with space available, so just add it */
{
*bp++ = ch;
}
}
*len = (lp - lines - 1);
debug1(DEX, D, "ReadLines returning (len = %d)", *len);
return lines;
} /* end ReadLines */
/*****************************************************************************/
/* */
/* WriteLines(FILE *fp, LINE *lines, int len) */
/* */
/* Write array of lines "lines", of length len, to file fp. */
/* */
/*****************************************************************************/
void WriteLines(FILE *fp, LINE *lines, int len)
{ int i;
for( i = 0; i < len; i++ )
{ fputs(lines[i], fp);
fputs("\n", fp);
}
}
/*****************************************************************************/
/* */
/* Line comparison functions (for qsort) */
/* */
/* By Jeff Kingston and Valery Ushakov (uwe). */
/* */
/*****************************************************************************/
static int pstrcmp(const void *a, const void *b) /* !UseCollate */
{
return strcmp (*(char **)a, *(char **)b);
}
static int pstrcollcmp(const void *a, const void *b) /* UseCollate */
{
return strcollcmp (*(char **)a, *(char**)b);
}
/*****************************************************************************/
/* */
/* void SortLines(LINE *lines, int lines_len) */
/* */
/* Sort the given lines. */
/* */
/*****************************************************************************/
void SortLines(LINE *lines, int lines_len)
{
qsort(lines, lines_len, sizeof(LINE), (UseCollate ? pstrcollcmp : pstrcmp));
}
/*****************************************************************************/
/* */
/* void SortFile(char *infile, char *outfile) */
/* */
/* Sort file infile, placing the result on file outfile. */
/* */
/*****************************************************************************/
void SortFile(FULL_CHAR *infile, FULL_CHAR *outfile)
{
LINE *lines;
int lines_len;
FILE *in_fp, *out_fp;
debug2(DEX, D, "SortFile(%s, %s)", infile, outfile);
/* open input file */
in_fp = fopen( (char *) infile, READ_BINARY);
if( in_fp == (FILE *) NULL )
Error(45, 5, "cannot open index file %s for reading",
FATAL, no_fpos, outfile);
/* open output file */
out_fp = fopen( (char *) outfile, WRITE_BINARY);
if( out_fp == (FILE *) NULL )
Error(45, 6, "cannot open index file %s for writing",
FATAL, no_fpos, outfile);
/* read lines, sort them, and write them out again sorted */
lines = ReadLines(in_fp, infile, (FULL_CHAR *) NULL, &lines_len);
SortLines(lines, lines_len);
fclose(in_fp);
WriteLines(out_fp, lines, lines_len);
fclose(out_fp);
}