| """Conversion pipeline templates. | |
| The problem: | |
| ------------ | |
| Suppose you have some data that you want to convert to another format, | |
| such as from GIF image format to PPM image format. Maybe the | |
| conversion involves several steps (e.g. piping it through compress or | |
| uuencode). Some of the conversion steps may require that their input | |
| is a disk file, others may be able to read standard input; similar for | |
| their output. The input to the entire conversion may also be read | |
| from a disk file or from an open file, and similar for its output. | |
| The module lets you construct a pipeline template by sticking one or | |
| more conversion steps together. It will take care of creating and | |
| removing temporary files if they are necessary to hold intermediate | |
| data. You can then use the template to do conversions from many | |
| different sources to many different destinations. The temporary | |
| file names used are different each time the template is used. | |
| The templates are objects so you can create templates for many | |
| different conversion steps and store them in a dictionary, for | |
| instance. | |
| Directions: | |
| ----------- | |
| To create a template: | |
| t = Template() | |
| To add a conversion step to a template: | |
| t.append(command, kind) | |
| where kind is a string of two characters: the first is '-' if the | |
| command reads its standard input or 'f' if it requires a file; the | |
| second likewise for the output. The command must be valid /bin/sh | |
| syntax. If input or output files are required, they are passed as | |
| $IN and $OUT; otherwise, it must be possible to use the command in | |
| a pipeline. | |
| To add a conversion step at the beginning: | |
| t.prepend(command, kind) | |
| To convert a file to another file using a template: | |
| sts = t.copy(infile, outfile) | |
| If infile or outfile are the empty string, standard input is read or | |
| standard output is written, respectively. The return value is the | |
| exit status of the conversion pipeline. | |
| To open a file for reading or writing through a conversion pipeline: | |
| fp = t.open(file, mode) | |
| where mode is 'r' to read the file, or 'w' to write it -- just like | |
| for the built-in function open() or for os.popen(). | |
| To create a new template object initialized to a given one: | |
| t2 = t.clone() | |
| For an example, see the function test() at the end of the file. | |
| """ # ' | |
| import re | |
| import os | |
| import tempfile | |
| import string | |
| __all__ = ["Template"] | |
| # Conversion step kinds | |
| FILEIN_FILEOUT = 'ff' # Must read & write real files | |
| STDIN_FILEOUT = '-f' # Must write a real file | |
| FILEIN_STDOUT = 'f-' # Must read a real file | |
| STDIN_STDOUT = '--' # Normal pipeline element | |
| SOURCE = '.-' # Must be first, writes stdout | |
| SINK = '-.' # Must be last, reads stdin | |
| stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ | |
| SOURCE, SINK] | |
| class Template: | |
| """Class representing a pipeline template.""" | |
| def __init__(self): | |
| """Template() returns a fresh pipeline template.""" | |
| self.debugging = 0 | |
| self.reset() | |
| def __repr__(self): | |
| """t.__repr__() implements repr(t).""" | |
| return '<Template instance, steps=%r>' % (self.steps,) | |
| def reset(self): | |
| """t.reset() restores a pipeline template to its initial state.""" | |
| self.steps = [] | |
| def clone(self): | |
| """t.clone() returns a new pipeline template with identical | |
| initial state as the current one.""" | |
| t = Template() | |
| t.steps = self.steps[:] | |
| t.debugging = self.debugging | |
| return t | |
| def debug(self, flag): | |
| """t.debug(flag) turns debugging on or off.""" | |
| self.debugging = flag | |
| def append(self, cmd, kind): | |
| """t.append(cmd, kind) adds a new step at the end.""" | |
| if type(cmd) is not type(''): | |
| raise TypeError, \ | |
| 'Template.append: cmd must be a string' | |
| if kind not in stepkinds: | |
| raise ValueError, \ | |
| 'Template.append: bad kind %r' % (kind,) | |
| if kind == SOURCE: | |
| raise ValueError, \ | |
| 'Template.append: SOURCE can only be prepended' | |
| if self.steps and self.steps[-1][1] == SINK: | |
| raise ValueError, \ | |
| 'Template.append: already ends with SINK' | |
| if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): | |
| raise ValueError, \ | |
| 'Template.append: missing $IN in cmd' | |
| if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): | |
| raise ValueError, \ | |
| 'Template.append: missing $OUT in cmd' | |
| self.steps.append((cmd, kind)) | |
| def prepend(self, cmd, kind): | |
| """t.prepend(cmd, kind) adds a new step at the front.""" | |
| if type(cmd) is not type(''): | |
| raise TypeError, \ | |
| 'Template.prepend: cmd must be a string' | |
| if kind not in stepkinds: | |
| raise ValueError, \ | |
| 'Template.prepend: bad kind %r' % (kind,) | |
| if kind == SINK: | |
| raise ValueError, \ | |
| 'Template.prepend: SINK can only be appended' | |
| if self.steps and self.steps[0][1] == SOURCE: | |
| raise ValueError, \ | |
| 'Template.prepend: already begins with SOURCE' | |
| if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): | |
| raise ValueError, \ | |
| 'Template.prepend: missing $IN in cmd' | |
| if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): | |
| raise ValueError, \ | |
| 'Template.prepend: missing $OUT in cmd' | |
| self.steps.insert(0, (cmd, kind)) | |
| def open(self, file, rw): | |
| """t.open(file, rw) returns a pipe or file object open for | |
| reading or writing; the file is the other end of the pipeline.""" | |
| if rw == 'r': | |
| return self.open_r(file) | |
| if rw == 'w': | |
| return self.open_w(file) | |
| raise ValueError, \ | |
| 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,) | |
| def open_r(self, file): | |
| """t.open_r(file) and t.open_w(file) implement | |
| t.open(file, 'r') and t.open(file, 'w') respectively.""" | |
| if not self.steps: | |
| return open(file, 'r') | |
| if self.steps[-1][1] == SINK: | |
| raise ValueError, \ | |
| 'Template.open_r: pipeline ends width SINK' | |
| cmd = self.makepipeline(file, '') | |
| return os.popen(cmd, 'r') | |
| def open_w(self, file): | |
| if not self.steps: | |
| return open(file, 'w') | |
| if self.steps[0][1] == SOURCE: | |
| raise ValueError, \ | |
| 'Template.open_w: pipeline begins with SOURCE' | |
| cmd = self.makepipeline('', file) | |
| return os.popen(cmd, 'w') | |
| def copy(self, infile, outfile): | |
| return os.system(self.makepipeline(infile, outfile)) | |
| def makepipeline(self, infile, outfile): | |
| cmd = makepipeline(infile, self.steps, outfile) | |
| if self.debugging: | |
| print cmd | |
| cmd = 'set -x; ' + cmd | |
| return cmd | |
| def makepipeline(infile, steps, outfile): | |
| # Build a list with for each command: | |
| # [input filename or '', command string, kind, output filename or ''] | |
| list = [] | |
| for cmd, kind in steps: | |
| list.append(['', cmd, kind, '']) | |
| # | |
| # Make sure there is at least one step | |
| # | |
| if not list: | |
| list.append(['', 'cat', '--', '']) | |
| # | |
| # Take care of the input and output ends | |
| # | |
| [cmd, kind] = list[0][1:3] | |
| if kind[0] == 'f' and not infile: | |
| list.insert(0, ['', 'cat', '--', '']) | |
| list[0][0] = infile | |
| # | |
| [cmd, kind] = list[-1][1:3] | |
| if kind[1] == 'f' and not outfile: | |
| list.append(['', 'cat', '--', '']) | |
| list[-1][-1] = outfile | |
| # | |
| # Invent temporary files to connect stages that need files | |
| # | |
| garbage = [] | |
| for i in range(1, len(list)): | |
| lkind = list[i-1][2] | |
| rkind = list[i][2] | |
| if lkind[1] == 'f' or rkind[0] == 'f': | |
| (fd, temp) = tempfile.mkstemp() | |
| os.close(fd) | |
| garbage.append(temp) | |
| list[i-1][-1] = list[i][0] = temp | |
| # | |
| for item in list: | |
| [inf, cmd, kind, outf] = item | |
| if kind[1] == 'f': | |
| cmd = 'OUT=' + quote(outf) + '; ' + cmd | |
| if kind[0] == 'f': | |
| cmd = 'IN=' + quote(inf) + '; ' + cmd | |
| if kind[0] == '-' and inf: | |
| cmd = cmd + ' <' + quote(inf) | |
| if kind[1] == '-' and outf: | |
| cmd = cmd + ' >' + quote(outf) | |
| item[1] = cmd | |
| # | |
| cmdlist = list[0][1] | |
| for item in list[1:]: | |
| [cmd, kind] = item[1:3] | |
| if item[0] == '': | |
| if 'f' in kind: | |
| cmd = '{ ' + cmd + '; }' | |
| cmdlist = cmdlist + ' |\n' + cmd | |
| else: | |
| cmdlist = cmdlist + '\n' + cmd | |
| # | |
| if garbage: | |
| rmcmd = 'rm -f' | |
| for file in garbage: | |
| rmcmd = rmcmd + ' ' + quote(file) | |
| trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' | |
| cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd | |
| # | |
| return cmdlist | |
| # Reliably quote a string as a single argument for /bin/sh | |
| # Safe unquoted | |
| _safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./') | |
| def quote(file): | |
| """Return a shell-escaped version of the file string.""" | |
| for c in file: | |
| if c not in _safechars: | |
| break | |
| else: | |
| if not file: | |
| return "''" | |
| return file | |
| # use single quotes, and put single quotes into double quotes | |
| # the string $'b is then quoted as '$'"'"'b' | |
| return "'" + file.replace("'", "'\"'\"'") + "'" |