codegen/genRanges.py - third_party/github.com/GNOME/libxml2 - Git at Google

 #!/usr/bin/env python3
 #
 # Portions of this script have been (shamelessly) stolen from the
 # prior work of Daniel Veillard (genUnicode.py)
 #
 # I, however, take full credit for any bugs, errors or difficulties :-)
 #
 # William Brack
 # October 2003
 #
 # 18 October 2003
 # Modified to maintain binary compatibility with previous library versions
 # by adding a suffix 'Q' ('quick') to the macro generated for the original,
 # function, and adding generation of a function (with the original name) which
 # instantiates the macro.
 #

 import sys
 import rangetab

 #
 # A routine to take a list of yes/no (1, 0) values and turn it
 # into a list of ranges.  This will later be used to determine whether
 # to generate single-byte lookup tables, or inline comparisons
 #
 def makeRange(lst):
     ret = []
     pos = 0
     while pos < len(lst):
         try:            # index generates exception if not present
             s = lst[pos:].index(1)      # look for start of next range
         except:
             break                       # if no more, finished
         pos += s                        # pointer to start of possible range
         try:
             e = lst[pos:].index(0)      # look for end of range
             e += pos
         except:                         # if no end, set to end of list
             e = len(lst)
         ret.append((pos, e-1))          # append range tuple to list
         pos = e + 1                     # ready to check for next range
     return ret

 # minTableSize gives the minimum number of ranges which must be present
 # before a 256-byte lookup table is produced.  If there are less than this
 # number, a macro with inline comparisons is generated
 minTableSize = 6

 # dictionary of functions, key=name, element contains char-map and range-list
 Functs = {}

 state = 0

 try:
     defines = open("codegen/ranges.def", "r")
 except:
     print("Missing codegen/ranges.def, aborting ...")
     sys.exit(1)

 #
 # The lines in the .def file have three types:-
 #   name:   Defines a new function block
 #   ur:     Defines individual or ranges of unicode values
 #   end:    Indicates the end of the function block
 #
 # These lines are processed below.
 #
 for line in defines.readlines():
     # ignore blank lines, or lines beginning with '#'
     if line[0] == '#':
         continue
     line = line.strip()
     if line == '':
         continue
     # split line into space-separated fields, then split on type
     try:
         fields = line.split(' ')
         #
         # name line:
         #   validate any previous function block already ended
         #   validate this function not already defined
         #   initialize an entry in the function dicitonary
         #       including a mask table with no values yet defined
         #
         if fields[0] == 'name':
             name = fields[1]
             if state != 0:
                 print("'name' %s found before previous name" \
                       "completed" % (fields[1]))
                 continue
             state = 1
             if name in Functs:
                 print("name '%s' already present - may give" \
                       " wrong results" % (name))
             else:
                 # dict entry with two list elements (chdata, rangedata)
                 Functs[name] = [ [], [] ]
                 for v in range(256):
                     Functs[name][0].append(0)
         #
         # end line:
         #   validate there was a preceding function name line
         #   set state to show no current function active
         #
         elif fields[0] == 'end':
             if state == 0:
                 print("'end' found outside of function block")
                 continue
             state = 0

         #
         # ur line:
         #   validate function has been defined
         #   process remaining fields on the line, which may be either
         #       individual unicode values or ranges of values
         #
         elif fields[0] == 'ur':
             if state != 1:
                 raise Exception("'ur' found outside of 'name' block")
             for el in fields[1:]:
                 pos = el.find('..')
                 # pos <=0 means not a range, so must be individual value
                 if pos <= 0:
                     # cheap handling of hex or decimal values
                     if el[0:2] == '0x':
                         value = int(el[2:],16)
                     elif el[0] == "'":
                         value = ord(el[1])
                     else:
                         value = int(el)
                     if ((value < 0) | (value > 0x1fffff)):
                         raise Exception('Illegal value (%s) in ch for'\
                                 ' name %s' % (el,name))
                     # for ur we have only ranges (makes things simpler),
                     # so convert val to range
                     currange = (value, value)
                 # pos > 0 means this is a range, so isolate/validate
                 # the interval
                 else:
                     # split the range into it's first-val, last-val
                     (first, last) = el.split("..")
                     # convert values from text into binary
                     if first[0:2] == '0x':
                         start = int(first[2:],16)
                     elif first[0] == "'":
                         start = ord(first[1])
                     else:
                         start = int(first)
                     if last[0:2] == '0x':
                         end = int(last[2:],16)
                     elif last[0] == "'":
                         end = ord(last[1])
                     else:
                         end = int(last)
                     if (start < 0) | (end > 0x1fffff) | (start > end):
                         raise Exception("Invalid range '%s'" % el)
                     currange = (start, end)
                 # common path - 'currange' has the range, now take care of it
                 # We split on single-byte values vs. multibyte
                 if currange[1] < 0x100: # single-byte
                     for ch in range(currange[0],currange[1]+1):
                         # validate that value not previously defined
                         if Functs[name][0][ch]:
                             msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
                             raise Exception(msg)
                         Functs[name][0][ch] = 1
                 else:                   # multi-byte
                     if currange in Functs[name][1]:
                         raise Exception("range already defined in" \
                                 " function")
                     else:
                         Functs[name][1].append(currange)

     except:
         print("Failed to process line: %s" % (line))
         raise

 try:
     output = open("codegen/ranges.inc", "w")
 except:
     print("Failed to open codegen/ranges.inc")
     sys.exit(1)

 #
 # Now output the generated data.
 #

 fkeys = sorted(Functs.keys())

 for f in fkeys:

 # First we convert the specified single-byte values into a group of ranges.
     if max(Functs[f][0]) > 0:   # only check if at least one entry
         rangeTable = makeRange(Functs[f][0])
         numRanges = len(rangeTable)
         if numRanges >= minTableSize:   # table is worthwhile
             # write the constant data to the code file
             output.write("const unsigned char %s_tab[256] = {\n" % f)
             pline = "   "
             for n in range(255):
                 pline += " 0x%02x," % Functs[f][0][n]
                 if len(pline) > 72:
                     output.write(pline + "\n")
                     pline = "   "
             output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])

 #
 # Next we do the unicode ranges
 #

 for f in fkeys:
     if len(Functs[f][1]) > 0:   # only generate if unicode ranges present
         rangeTable = Functs[f][1]
         rangeTable.sort()       # ascending tuple sequence
         group = rangetab.gen_range_tables(output, f, '_srng', '_lrng',
                                           rangeTable)

         output.write("const xmlChRangeGroup %sGroup =\n\t%s;\n\n" %
                      (f, group))

 output.close()
	#!/usr/bin/env python3
	#
	# Portions of this script have been (shamelessly) stolen from the
	# prior work of Daniel Veillard (genUnicode.py)
	#
	# I, however, take full credit for any bugs, errors or difficulties :-)
	#
	# William Brack
	# October 2003
	#
	# 18 October 2003
	# Modified to maintain binary compatibility with previous library versions
	# by adding a suffix 'Q' ('quick') to the macro generated for the original,
	# function, and adding generation of a function (with the original name) which
	# instantiates the macro.
	#

	import sys
	import rangetab

	#
	# A routine to take a list of yes/no (1, 0) values and turn it
	# into a list of ranges. This will later be used to determine whether
	# to generate single-byte lookup tables, or inline comparisons
	#
	def makeRange(lst):
	ret = []
	pos = 0
	while pos < len(lst):
	try: # index generates exception if not present
	s = lst[pos:].index(1) # look for start of next range
	except:
	break # if no more, finished
	pos += s # pointer to start of possible range
	try:
	e = lst[pos:].index(0) # look for end of range
	e += pos
	except: # if no end, set to end of list
	e = len(lst)
	ret.append((pos, e-1)) # append range tuple to list
	pos = e + 1 # ready to check for next range
	return ret

	# minTableSize gives the minimum number of ranges which must be present
	# before a 256-byte lookup table is produced. If there are less than this
	# number, a macro with inline comparisons is generated
	minTableSize = 6

	# dictionary of functions, key=name, element contains char-map and range-list
	Functs = {}

	state = 0

	try:
	defines = open("codegen/ranges.def", "r")
	except:
	print("Missing codegen/ranges.def, aborting ...")
	sys.exit(1)

	#
	# The lines in the .def file have three types:-
	# name: Defines a new function block
	# ur: Defines individual or ranges of unicode values
	# end: Indicates the end of the function block
	#
	# These lines are processed below.
	#
	for line in defines.readlines():
	# ignore blank lines, or lines beginning with '#'
	if line[0] == '#':
	continue
	line = line.strip()
	if line == '':
	continue
	# split line into space-separated fields, then split on type
	try:
	fields = line.split(' ')
	#
	# name line:
	# validate any previous function block already ended
	# validate this function not already defined
	# initialize an entry in the function dicitonary
	# including a mask table with no values yet defined
	#
	if fields[0] == 'name':
	name = fields[1]
	if state != 0:
	print("'name' %s found before previous name" \
	"completed" % (fields[1]))
	continue
	state = 1
	if name in Functs:
	print("name '%s' already present - may give" \
	" wrong results" % (name))
	else:
	# dict entry with two list elements (chdata, rangedata)
	Functs[name] = [ [], [] ]
	for v in range(256):
	Functs[name][0].append(0)
	#
	# end line:
	# validate there was a preceding function name line
	# set state to show no current function active
	#
	elif fields[0] == 'end':
	if state == 0:
	print("'end' found outside of function block")
	continue
	state = 0

	#
	# ur line:
	# validate function has been defined
	# process remaining fields on the line, which may be either
	# individual unicode values or ranges of values
	#
	elif fields[0] == 'ur':
	if state != 1:
	raise Exception("'ur' found outside of 'name' block")
	for el in fields[1:]:
	pos = el.find('..')
	# pos <=0 means not a range, so must be individual value
	if pos <= 0:
	# cheap handling of hex or decimal values
	if el[0:2] == '0x':
	value = int(el[2:],16)
	elif el[0] == "'":
	value = ord(el[1])
	else:
	value = int(el)
	if ((value < 0) \| (value > 0x1fffff)):
	raise Exception('Illegal value (%s) in ch for'\
	' name %s' % (el,name))
	# for ur we have only ranges (makes things simpler),
	# so convert val to range
	currange = (value, value)
	# pos > 0 means this is a range, so isolate/validate
	# the interval
	else:
	# split the range into it's first-val, last-val
	(first, last) = el.split("..")
	# convert values from text into binary
	if first[0:2] == '0x':
	start = int(first[2:],16)
	elif first[0] == "'":
	start = ord(first[1])
	else:
	start = int(first)
	if last[0:2] == '0x':
	end = int(last[2:],16)
	elif last[0] == "'":
	end = ord(last[1])
	else:
	end = int(last)
	if (start < 0) \| (end > 0x1fffff) \| (start > end):
	raise Exception("Invalid range '%s'" % el)
	currange = (start, end)
	# common path - 'currange' has the range, now take care of it
	# We split on single-byte values vs. multibyte
	if currange[1] < 0x100: # single-byte
	for ch in range(currange[0],currange[1]+1):
	# validate that value not previously defined
	if Functs[name][0][ch]:
	msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
	raise Exception(msg)
	Functs[name][0][ch] = 1
	else: # multi-byte
	if currange in Functs[name][1]:
	raise Exception("range already defined in" \
	" function")
	else:
	Functs[name][1].append(currange)

	except:
	print("Failed to process line: %s" % (line))
	raise

	try:
	output = open("codegen/ranges.inc", "w")
	except:
	print("Failed to open codegen/ranges.inc")
	sys.exit(1)

	#
	# Now output the generated data.
	#

	fkeys = sorted(Functs.keys())

	for f in fkeys:

	# First we convert the specified single-byte values into a group of ranges.
	if max(Functs[f][0]) > 0: # only check if at least one entry
	rangeTable = makeRange(Functs[f][0])
	numRanges = len(rangeTable)
	if numRanges >= minTableSize: # table is worthwhile
	# write the constant data to the code file
	output.write("const unsigned char %s_tab[256] = {\n" % f)
	pline = " "
	for n in range(255):
	pline += " 0x%02x," % Functs[f][0][n]
	if len(pline) > 72:
	output.write(pline + "\n")
	pline = " "
	output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])

	#
	# Next we do the unicode ranges
	#

	for f in fkeys:
	if len(Functs[f][1]) > 0: # only generate if unicode ranges present
	rangeTable = Functs[f][1]
	rangeTable.sort() # ascending tuple sequence
	group = rangetab.gen_range_tables(output, f, '_srng', '_lrng',
	rangeTable)

	output.write("const xmlChRangeGroup %sGroup =\n\t%s;\n\n" %
	(f, group))

	output.close()