| #!/usr/bin/env python3 |
| # |
| # Portions of this script have been (shamelessly) stolen from the |
| # prior work of Daniel Veillard (genUnicode.py) |
| # |
| # I, however, take full credit for any bugs, errors or difficulties :-) |
| # |
| # William Brack |
| # October 2003 |
| # |
| # 18 October 2003 |
| # Modified to maintain binary compatibility with previous library versions |
| # by adding a suffix 'Q' ('quick') to the macro generated for the original, |
| # function, and adding generation of a function (with the original name) which |
| # instantiates the macro. |
| # |
| |
| import sys |
| import rangetab |
| |
| # |
| # A routine to take a list of yes/no (1, 0) values and turn it |
| # into a list of ranges. This will later be used to determine whether |
| # to generate single-byte lookup tables, or inline comparisons |
| # |
| def makeRange(lst): |
| ret = [] |
| pos = 0 |
| while pos < len(lst): |
| try: # index generates exception if not present |
| s = lst[pos:].index(1) # look for start of next range |
| except: |
| break # if no more, finished |
| pos += s # pointer to start of possible range |
| try: |
| e = lst[pos:].index(0) # look for end of range |
| e += pos |
| except: # if no end, set to end of list |
| e = len(lst) |
| ret.append((pos, e-1)) # append range tuple to list |
| pos = e + 1 # ready to check for next range |
| return ret |
| |
| # minTableSize gives the minimum number of ranges which must be present |
| # before a 256-byte lookup table is produced. If there are less than this |
| # number, a macro with inline comparisons is generated |
| minTableSize = 6 |
| |
| # dictionary of functions, key=name, element contains char-map and range-list |
| Functs = {} |
| |
| state = 0 |
| |
| try: |
| defines = open("codegen/ranges.def", "r") |
| except: |
| print("Missing codegen/ranges.def, aborting ...") |
| sys.exit(1) |
| |
| # |
| # The lines in the .def file have three types:- |
| # name: Defines a new function block |
| # ur: Defines individual or ranges of unicode values |
| # end: Indicates the end of the function block |
| # |
| # These lines are processed below. |
| # |
| for line in defines.readlines(): |
| # ignore blank lines, or lines beginning with '#' |
| if line[0] == '#': |
| continue |
| line = line.strip() |
| if line == '': |
| continue |
| # split line into space-separated fields, then split on type |
| try: |
| fields = line.split(' ') |
| # |
| # name line: |
| # validate any previous function block already ended |
| # validate this function not already defined |
| # initialize an entry in the function dicitonary |
| # including a mask table with no values yet defined |
| # |
| if fields[0] == 'name': |
| name = fields[1] |
| if state != 0: |
| print("'name' %s found before previous name" \ |
| "completed" % (fields[1])) |
| continue |
| state = 1 |
| if name in Functs: |
| print("name '%s' already present - may give" \ |
| " wrong results" % (name)) |
| else: |
| # dict entry with two list elements (chdata, rangedata) |
| Functs[name] = [ [], [] ] |
| for v in range(256): |
| Functs[name][0].append(0) |
| # |
| # end line: |
| # validate there was a preceding function name line |
| # set state to show no current function active |
| # |
| elif fields[0] == 'end': |
| if state == 0: |
| print("'end' found outside of function block") |
| continue |
| state = 0 |
| |
| # |
| # ur line: |
| # validate function has been defined |
| # process remaining fields on the line, which may be either |
| # individual unicode values or ranges of values |
| # |
| elif fields[0] == 'ur': |
| if state != 1: |
| raise Exception("'ur' found outside of 'name' block") |
| for el in fields[1:]: |
| pos = el.find('..') |
| # pos <=0 means not a range, so must be individual value |
| if pos <= 0: |
| # cheap handling of hex or decimal values |
| if el[0:2] == '0x': |
| value = int(el[2:],16) |
| elif el[0] == "'": |
| value = ord(el[1]) |
| else: |
| value = int(el) |
| if ((value < 0) | (value > 0x1fffff)): |
| raise Exception('Illegal value (%s) in ch for'\ |
| ' name %s' % (el,name)) |
| # for ur we have only ranges (makes things simpler), |
| # so convert val to range |
| currange = (value, value) |
| # pos > 0 means this is a range, so isolate/validate |
| # the interval |
| else: |
| # split the range into it's first-val, last-val |
| (first, last) = el.split("..") |
| # convert values from text into binary |
| if first[0:2] == '0x': |
| start = int(first[2:],16) |
| elif first[0] == "'": |
| start = ord(first[1]) |
| else: |
| start = int(first) |
| if last[0:2] == '0x': |
| end = int(last[2:],16) |
| elif last[0] == "'": |
| end = ord(last[1]) |
| else: |
| end = int(last) |
| if (start < 0) | (end > 0x1fffff) | (start > end): |
| raise Exception("Invalid range '%s'" % el) |
| currange = (start, end) |
| # common path - 'currange' has the range, now take care of it |
| # We split on single-byte values vs. multibyte |
| if currange[1] < 0x100: # single-byte |
| for ch in range(currange[0],currange[1]+1): |
| # validate that value not previously defined |
| if Functs[name][0][ch]: |
| msg = "Duplicate ch value '%s' for name '%s'" % (el, name) |
| raise Exception(msg) |
| Functs[name][0][ch] = 1 |
| else: # multi-byte |
| if currange in Functs[name][1]: |
| raise Exception("range already defined in" \ |
| " function") |
| else: |
| Functs[name][1].append(currange) |
| |
| except: |
| print("Failed to process line: %s" % (line)) |
| raise |
| |
| try: |
| output = open("codegen/ranges.inc", "w") |
| except: |
| print("Failed to open codegen/ranges.inc") |
| sys.exit(1) |
| |
| # |
| # Now output the generated data. |
| # |
| |
| fkeys = sorted(Functs.keys()) |
| |
| for f in fkeys: |
| |
| # First we convert the specified single-byte values into a group of ranges. |
| if max(Functs[f][0]) > 0: # only check if at least one entry |
| rangeTable = makeRange(Functs[f][0]) |
| numRanges = len(rangeTable) |
| if numRanges >= minTableSize: # table is worthwhile |
| # write the constant data to the code file |
| output.write("const unsigned char %s_tab[256] = {\n" % f) |
| pline = " " |
| for n in range(255): |
| pline += " 0x%02x," % Functs[f][0][n] |
| if len(pline) > 72: |
| output.write(pline + "\n") |
| pline = " " |
| output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) |
| |
| # |
| # Next we do the unicode ranges |
| # |
| |
| for f in fkeys: |
| if len(Functs[f][1]) > 0: # only generate if unicode ranges present |
| rangeTable = Functs[f][1] |
| rangeTable.sort() # ascending tuple sequence |
| group = rangetab.gen_range_tables(output, f, '_srng', '_lrng', |
| rangeTable) |
| |
| output.write("const xmlChRangeGroup %sGroup =\n\t%s;\n\n" % |
| (f, group)) |
| |
| output.close() |
| |