#! /usr/bin/env python | |
# 1) Regular Expressions Test | |
# | |
# Read a file of (extended per egrep) regular expressions (one per line), | |
# and apply those to all files whose names are listed on the command line. | |
# Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns | |
# against a five /etc/termcap files. Tests using more elaborate patters | |
# would also be interesting. Your code should not break if given hundreds | |
# of regular expressions or binary files to scan. | |
# This implementation: | |
# - combines all patterns into a single one using ( ... | ... | ... ) | |
# - reads patterns from stdin, scans files given as command line arguments | |
# - produces output in the format <file>:<lineno>:<line> | |
# - is only about 2.5 times as slow as egrep (though I couldn't run | |
# Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) | |
import string | |
import sys | |
import re | |
def main(): | |
pats = map(chomp, sys.stdin.readlines()) | |
bigpat = '(' + '|'.join(pats) + ')' | |
prog = re.compile(bigpat) | |
for file in sys.argv[1:]: | |
try: | |
fp = open(file, 'r') | |
except IOError, msg: | |
print "%s: %s" % (file, msg) | |
continue | |
lineno = 0 | |
while 1: | |
line = fp.readline() | |
if not line: | |
break | |
lineno = lineno + 1 | |
if prog.search(line): | |
print "%s:%s:%s" % (file, lineno, line), | |
def chomp(s): | |
return s.rstrip('\n') | |
if __name__ == '__main__': | |
main() |