doc/apibuild.py - third_party/github.com/GNOME/libxml2 - Git at Google

 #!/usr/bin/python -u
 #
 # This is the API builder, it parses the C sources and build the
 # API formal description in XML.
 #
 # See Copyright for the status of this software.
 #
 # daniel@veillard.com
 #
 import os, sys
 import string
 import glob

 debug=0
 #debugsym='ignorableWhitespaceSAXFunc'
 debugsym=None

 #
 # C parser analysis code
 #
 ignored_files = {
   "trio": "too many non standard macros",
   "trio.c": "too many non standard macros",
   "trionan.c": "too many non standard macros",
   "triostr.c": "too many non standard macros",
   "acconfig.h": "generated portability layer",
   "config.h": "generated portability layer",
   "libxml.h": "internal only",
   "testOOM.c": "out of memory tester",
   "testOOMlib.h": "out of memory tester",
   "testOOMlib.c": "out of memory tester",
   "rngparser.c": "not yet integrated",
   "rngparser.h": "not yet integrated",
   "elfgcchack.h": "not a normal header",
   "testHTML.c": "test tool",
   "testReader.c": "test tool",
   "testSchemas.c": "test tool",
   "testXPath.c": "test tool",
   "testAutomata.c": "test tool",
   "testModule.c": "test tool",
   "testRegexp.c": "test tool",
   "testThreads.c": "test tool",
   "testC14N.c": "test tool",
   "testRelax.c": "test tool",
   "testSAX.c": "test tool",
   "testURI.c": "test tool",
   "testapi.c": "generated regression tests",
   "runtest.c": "regression tests program",
   "runsuite.c": "regression tests program",
   "tst.c": "not part of the library",
   "test.c": "not part of the library",
   "testdso.c": "test for dynamid shared libraries",
   "testrecurse.c": "test for entities recursions",
   "xzlib.h": "Internal API only 2.8.0",
   "buf.h": "Internal API only 2.9.0",
   "enc.h": "Internal API only 2.9.0",
   "/save.h": "Internal API only 2.9.0",
   "timsort.h": "Internal header only for xpath.c 2.9.0",
 }

 ignored_words = {
   "WINAPI": (0, "Windows keyword"),
   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
   "XMLCALL": (0, "Special macro for win32 calls"),
   "XSLTCALL": (0, "Special macro for win32 calls"),
   "XMLCDECL": (0, "Special macro for win32 calls"),
   "EXSLTCALL": (0, "Special macro for win32 calls"),
   "__declspec": (3, "Windows keyword"),
   "__stdcall": (0, "Windows keyword"),
   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
   "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"),
   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
   "X_IN_Y": (5, "macro function builder"),
   "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
   "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
   "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
   "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
   "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
 }

 def escape(raw):
     raw = raw.replace('&', '&amp;')
     raw = raw.replace('<', '&lt;')
     raw = raw.replace('>', '&gt;')
     raw = raw.replace("'", '&apos;')
     raw = raw.replace('"', '&quot;')
     return raw

 def uniq(items):
     d = {}
     for item in items:
         d[item]=1
     return list(d.keys())

 class identifier:
     def __init__(self, name, header=None, module=None, type=None, lineno = 0,
                  info=None, extra=None, conditionals = None):
         self.name = name
         self.header = header
         self.module = module
         self.type = type
         self.info = info
         self.extra = extra
         self.lineno = lineno
         self.static = 0
         if conditionals == None or len(conditionals) == 0:
             self.conditionals = None
         else:
             self.conditionals = conditionals[:]
         if self.name == debugsym:
             print("=> define %s : %s" % (debugsym, (module, type, info,
                                          extra, conditionals)))

     def __repr__(self):
         r = "%s %s:" % (self.type, self.name)
         if self.static:
             r = r + " static"
         if self.module != None:
             r = r + " from %s" % (self.module)
         if self.info != None:
             r = r + " " +  repr(self.info)
         if self.extra != None:
             r = r + " " + repr(self.extra)
         if self.conditionals != None:
             r = r + " " + repr(self.conditionals)
         return r


     def set_header(self, header):
         self.header = header
     def set_module(self, module):
         self.module = module
     def set_type(self, type):
         self.type = type
     def set_info(self, info):
         self.info = info
     def set_extra(self, extra):
         self.extra = extra
     def set_lineno(self, lineno):
         self.lineno = lineno
     def set_static(self, static):
         self.static = static
     def set_conditionals(self, conditionals):
         if conditionals == None or len(conditionals) == 0:
             self.conditionals = None
         else:
             self.conditionals = conditionals[:]

     def get_name(self):
         return self.name
     def get_header(self):
         return self.module
     def get_module(self):
         return self.module
     def get_type(self):
         return self.type
     def get_info(self):
         return self.info
     def get_lineno(self):
         return self.lineno
     def get_extra(self):
         return self.extra
     def get_static(self):
         return self.static
     def get_conditionals(self):
         return self.conditionals

     def update(self, header, module, type = None, info = None, extra=None,
                conditionals=None):
         if self.name == debugsym:
             print("=> update %s : %s" % (debugsym, (module, type, info,
                                          extra, conditionals)))
         if header != None and self.header == None:
             self.set_header(module)
         if module != None and (self.module == None or self.header == self.module):
             self.set_module(module)
         if type != None and self.type == None:
             self.set_type(type)
         if info != None:
             self.set_info(info)
         if extra != None:
             self.set_extra(extra)
         if conditionals != None:
             self.set_conditionals(conditionals)

 class index:
     def __init__(self, name = "noname"):
         self.name = name
         self.identifiers = {}
         self.functions = {}
         self.variables = {}
         self.includes = {}
         self.structs = {}
         self.enums = {}
         self.typedefs = {}
         self.macros = {}
         self.references = {}
         self.info = {}

     def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
         if name[0:2] == '__':
             return None
         d = None
         try:
            d = self.identifiers[name]
            d.update(header, module, type, lineno, info, extra, conditionals)
         except:
            d = identifier(name, header, module, type, lineno, info, extra, conditionals)
            self.identifiers[name] = d

         if d != None and static == 1:
             d.set_static(1)

         if d != None and name != None and type != None:
             self.references[name] = d

         if name == debugsym:
             print("New ref: %s" % (d))

         return d

     def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
         if name[0:2] == '__':
             return None
         d = None
         try:
            d = self.identifiers[name]
            d.update(header, module, type, lineno, info, extra, conditionals)
         except:
            d = identifier(name, header, module, type, lineno, info, extra, conditionals)
            self.identifiers[name] = d

         if d != None and static == 1:
             d.set_static(1)

         if d != None and name != None and type != None:
             if type == "function":
                 self.functions[name] = d
             elif type == "functype":
                 self.functions[name] = d
             elif type == "variable":
                 self.variables[name] = d
             elif type == "include":
                 self.includes[name] = d
             elif type == "struct":
                 self.structs[name] = d
             elif type == "enum":
                 self.enums[name] = d
             elif type == "typedef":
                 self.typedefs[name] = d
             elif type == "macro":
                 self.macros[name] = d
             else:
                 print("Unable to register type ", type)

         if name == debugsym:
             print("New symbol: %s" % (d))

         return d

     def merge(self, idx):
         for id in list(idx.functions.keys()):
               #
               # macro might be used to override functions or variables
               # definitions
               #
              if id in self.macros:
                  del self.macros[id]
              if id in self.functions:
                  print("function %s from %s redeclared in %s" % (
                     id, self.functions[id].header, idx.functions[id].header))
              else:
                  self.functions[id] = idx.functions[id]
                  self.identifiers[id] = idx.functions[id]
         for id in list(idx.variables.keys()):
               #
               # macro might be used to override functions or variables
               # definitions
               #
              if id in self.macros:
                  del self.macros[id]
              if id in self.variables:
                  print("variable %s from %s redeclared in %s" % (
                     id, self.variables[id].header, idx.variables[id].header))
              else:
                  self.variables[id] = idx.variables[id]
                  self.identifiers[id] = idx.variables[id]
         for id in list(idx.structs.keys()):
              if id in self.structs:
                  print("struct %s from %s redeclared in %s" % (
                     id, self.structs[id].header, idx.structs[id].header))
              else:
                  self.structs[id] = idx.structs[id]
                  self.identifiers[id] = idx.structs[id]
         for id in list(idx.typedefs.keys()):
              if id in self.typedefs:
                  print("typedef %s from %s redeclared in %s" % (
                     id, self.typedefs[id].header, idx.typedefs[id].header))
              else:
                  self.typedefs[id] = idx.typedefs[id]
                  self.identifiers[id] = idx.typedefs[id]
         for id in list(idx.macros.keys()):
               #
               # macro might be used to override functions or variables
               # definitions
               #
              if id in self.variables:
                  continue
              if id in self.functions:
                  continue
              if id in self.enums:
                  continue
              if id in self.macros:
                  print("macro %s from %s redeclared in %s" % (
                     id, self.macros[id].header, idx.macros[id].header))
              else:
                  self.macros[id] = idx.macros[id]
                  self.identifiers[id] = idx.macros[id]
         for id in list(idx.enums.keys()):
              if id in self.enums:
                  print("enum %s from %s redeclared in %s" % (
                     id, self.enums[id].header, idx.enums[id].header))
              else:
                  self.enums[id] = idx.enums[id]
                  self.identifiers[id] = idx.enums[id]

     def merge_public(self, idx):
         for id in list(idx.functions.keys()):
              if id in self.functions:
                  # check that function condition agrees with header
                  if idx.functions[id].conditionals != \
                     self.functions[id].conditionals:
                      print("Header condition differs from Function for %s:" \
                         % id)
                      print("  H: %s" % self.functions[id].conditionals)
                      print("  C: %s" % idx.functions[id].conditionals)
                  up = idx.functions[id]
                  self.functions[id].update(None, up.module, up.type, up.info, up.extra)
          #     else:
          #         print "Function %s from %s is not declared in headers" % (
          #                id, idx.functions[id].module)
          # TODO: do the same for variables.

     def analyze_dict(self, type, dict):
         count = 0
         public = 0
         for name in list(dict.keys()):
             id = dict[name]
             count = count + 1
             if id.static == 0:
                 public = public + 1
         if count != public:
             print("  %d %s , %d public" % (count, type, public))
         elif count != 0:
             print("  %d public %s" % (count, type))


     def analyze(self):
         self.analyze_dict("functions", self.functions)
         self.analyze_dict("variables", self.variables)
         self.analyze_dict("structs", self.structs)
         self.analyze_dict("typedefs", self.typedefs)
         self.analyze_dict("macros", self.macros)

 class CLexer:
     """A lexer for the C language, tokenize the input by reading and
        analyzing it line by line"""
     def __init__(self, input):
         self.input = input
         self.tokens = []
         self.line = ""
         self.lineno = 0

     def getline(self):
         line = ''
         while line == '':
             line = self.input.readline()
             if not line:
                 return None
             self.lineno = self.lineno + 1
             line = line.lstrip()
             line = line.rstrip()
             if line == '':
                 continue
             while line[-1] == '\\':
                 line = line[:-1]
                 n = self.input.readline()
                 self.lineno = self.lineno + 1
                 n = n.lstrip()
                 n = n.rstrip()
                 if not n:
                     break
                 else:
                     line = line + n
         return line

     def getlineno(self):
         return self.lineno

     def push(self, token):
         self.tokens.insert(0, token);

     def debug(self):
         print("Last token: ", self.last)
         print("Token queue: ", self.tokens)
         print("Line %d end: " % (self.lineno), self.line)

     def token(self):
         while self.tokens == []:
             if self.line == "":
                 line = self.getline()
             else:
                 line = self.line
                 self.line = ""
             if line == None:
                 return None

             if line[0] == '#':
                 self.tokens = list(map((lambda x: ('preproc', x)),
                                   line.split()))
                 break;
             l = len(line)
             if line[0] == '"' or line[0] == "'":
                 end = line[0]
                 line = line[1:]
                 found = 0
                 tok = ""
                 while found == 0:
                     i = 0
                     l = len(line)
                     while i < l:
                         if line[i] == end:
                             self.line = line[i+1:]
                             line = line[:i]
                             l = i
                             found = 1
                             break
                         if line[i] == '\\':
                             i = i + 1
                         i = i + 1
                     tok = tok + line
                     if found == 0:
                         line = self.getline()
                         if line == None:
                             return None
                 self.last = ('string', tok)
                 return self.last

             if l >= 2 and line[0] == '/' and line[1] == '*':
                 line = line[2:]
                 found = 0
                 tok = ""
                 while found == 0:
                     i = 0
                     l = len(line)
                     while i < l:
                         if line[i] == '*' and i+1 < l and line[i+1] == '/':
                             self.line = line[i+2:]
                             line = line[:i-1]
                             l = i
                             found = 1
                             break
                         i = i + 1
                     if tok != "":
                         tok = tok + "\n"
                     tok = tok + line
                     if found == 0:
                         line = self.getline()
                         if line == None:
                             return None
                 self.last = ('comment', tok)
                 return self.last
             if l >= 2 and line[0] == '/' and line[1] == '/':
                 line = line[2:]
                 self.last = ('comment', line)
                 return self.last
             i = 0
             while i < l:
                 if line[i] == '/' and i+1 < l and line[i+1] == '/':
                     self.line = line[i:]
                     line = line[:i]
                     break
                 if line[i] == '/' and i+1 < l and line[i+1] == '*':
                     self.line = line[i:]
                     line = line[:i]
                     break
                 if line[i] == '"' or line[i] == "'":
                     self.line = line[i:]
                     line = line[:i]
                     break
                 i = i + 1
             l = len(line)
             i = 0
             while i < l:
                 if line[i] == ' ' or line[i] == '\t':
                     i = i + 1
                     continue
                 o = ord(line[i])
                 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
                    (o >= 48 and o <= 57):
                     s = i
                     while i < l:
                         o = ord(line[i])
                         if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
                            (o >= 48 and o <= 57) or \
 			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
                             i = i + 1
                         else:
                             break
                     self.tokens.append(('name', line[s:i]))
                     continue
                 if "(){}:;,[]".find(line[i]) != -1:
 #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
 #                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
 #                    line[i] == ',' or line[i] == '[' or line[i] == ']':
                     self.tokens.append(('sep', line[i]))
                     i = i + 1
                     continue
                 if "+-*><=/%&!|.".find(line[i]) != -1:
 #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
 #                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
 #                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
 #                    line[i] == '!' or line[i] == '|' or line[i] == '.':
                     if line[i] == '.' and  i + 2 < l and \
                        line[i+1] == '.' and line[i+2] == '.':
                         self.tokens.append(('name', '...'))
                         i = i + 3
                         continue

                     j = i + 1
                     if j < l and (
                        "+-*><=/%&!|".find(line[j]) != -1):
 #                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
 #                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
 #                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
 #                        line[j] == '!' or line[j] == '|'):
                         self.tokens.append(('op', line[i:j+1]))
                         i = j + 1
                     else:
                         self.tokens.append(('op', line[i]))
                         i = i + 1
                     continue
                 s = i
                 while i < l:
                     o = ord(line[i])
                     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
                        (o >= 48 and o <= 57) or (
                         " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
 #                         line[i] != ' ' and line[i] != '\t' and
 #                         line[i] != '(' and line[i] != ')' and
 #                         line[i] != '{'  and line[i] != '}' and
 #                         line[i] != ':' and line[i] != ';' and
 #                         line[i] != ',' and line[i] != '+' and
 #                         line[i] != '-' and line[i] != '*' and
 #                         line[i] != '/' and line[i] != '%' and
 #                         line[i] != '&' and line[i] != '!' and
 #                         line[i] != '|' and line[i] != '[' and
 #                         line[i] != ']' and line[i] != '=' and
 #                         line[i] != '*' and line[i] != '>' and
 #                         line[i] != '<'):
                         i = i + 1
                     else:
                         break
                 self.tokens.append(('name', line[s:i]))

         tok = self.tokens[0]
         self.tokens = self.tokens[1:]
         self.last = tok
         return tok

 class CParser:
     """The C module parser"""
     def __init__(self, filename, idx = None):
         self.filename = filename
         if len(filename) > 2 and filename[-2:] == '.h':
             self.is_header = 1
         else:
             self.is_header = 0
         self.input = open(filename)
         self.lexer = CLexer(self.input)
         if idx == None:
             self.index = index()
         else:
             self.index = idx
         self.top_comment = ""
         self.last_comment = ""
         self.comment = None
         self.collect_ref = 0
         self.no_error = 0
         self.conditionals = []
         self.defines = []

     def collect_references(self):
         self.collect_ref = 1

     def stop_error(self):
         self.no_error = 1

     def start_error(self):
         self.no_error = 0

     def lineno(self):
         return self.lexer.getlineno()

     def index_add(self, name, module, static, type, info=None, extra = None):
         if self.is_header == 1:
             self.index.add(name, module, module, static, type, self.lineno(),
                            info, extra, self.conditionals)
         else:
             self.index.add(name, None, module, static, type, self.lineno(),
                            info, extra, self.conditionals)

     def index_add_ref(self, name, module, static, type, info=None,
                       extra = None):
         if self.is_header == 1:
             self.index.add_ref(name, module, module, static, type,
                                self.lineno(), info, extra, self.conditionals)
         else:
             self.index.add_ref(name, None, module, static, type, self.lineno(),
                                info, extra, self.conditionals)

     def warning(self, msg):
         if self.no_error:
             return
         print(msg)

     def error(self, msg, token=-1):
         if self.no_error:
             return

         print("Parse Error: " + msg)
         if token != -1:
             print("Got token ", token)
         self.lexer.debug()
         sys.exit(1)

     def debug(self, msg, token=-1):
         print("Debug: " + msg)
         if token != -1:
             print("Got token ", token)
         self.lexer.debug()

     def parseTopComment(self, comment):
         res = {}
         lines = comment.split("\n")
         item = None
         for line in lines:
             while line != "" and (line[0] == ' ' or line[0] == '\t'):
                 line = line[1:]
             while line != "" and line[0] == '*':
                 line = line[1:]
             while line != "" and (line[0] == ' ' or line[0] == '\t'):
                 line = line[1:]
             try:
                 (it, line) = line.split(":", 1)
                 item = it
                 while line != "" and (line[0] == ' ' or line[0] == '\t'):
                     line = line[1:]
                 if item in res:
                     res[item] = res[item] + " " + line
                 else:
                     res[item] = line
             except:
                 if item != None:
                     if item in res:
                         res[item] = res[item] + " " + line
                     else:
                         res[item] = line
         self.index.info = res

     def parseComment(self, token):
         if self.top_comment == "":
             self.top_comment = token[1]
         if self.comment == None or token[1][0] == '*':
             self.comment = token[1];
         else:
             self.comment = self.comment + token[1]
         token = self.lexer.token()

         if self.comment.find("DOC_DISABLE") != -1:
             self.stop_error()

         if self.comment.find("DOC_ENABLE") != -1:
             self.start_error()

         return token

     #
     # Parse a comment block associate to a typedef
     #
     def parseTypeComment(self, name, quiet = 0):
         if name[0:2] == '__':
             quiet = 1

         args = []
         desc = ""

         if self.comment == None:
             if not quiet:
                 self.warning("Missing comment for type %s" % (name))
             return((args, desc))
         if self.comment[0] != '*':
             if not quiet:
                 self.warning("Missing * in type comment for %s" % (name))
             return((args, desc))
         lines = self.comment.split('\n')
         if lines[0] == '*':
             del lines[0]
         if lines[0] != "* %s:" % (name):
             if not quiet:
                 self.warning("Misformatted type comment for %s" % (name))
                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
             return((args, desc))
         del lines[0]
         while len(lines) > 0 and lines[0] == '*':
             del lines[0]
         desc = ""
         while len(lines) > 0:
             l = lines[0]
             while len(l) > 0 and l[0] == '*':
                 l = l[1:]
             l = l.strip()
             desc = desc + " " + l
             del lines[0]

         desc = desc.strip()

         if quiet == 0:
             if desc == "":
                 self.warning("Type comment for %s lack description of the macro" % (name))

         return(desc)
     #
     # Parse a comment block associate to a macro
     #
     def parseMacroComment(self, name, quiet = 0):
         if name[0:2] == '__':
             quiet = 1

         args = []
         desc = ""

         if self.comment == None:
             if not quiet:
                 self.warning("Missing comment for macro %s" % (name))
             return((args, desc))
         if self.comment[0] != '*':
             if not quiet:
                 self.warning("Missing * in macro comment for %s" % (name))
             return((args, desc))
         lines = self.comment.split('\n')
         if lines[0] == '*':
             del lines[0]
         if lines[0] != "* %s:" % (name):
             if not quiet:
                 self.warning("Misformatted macro comment for %s" % (name))
                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
             return((args, desc))
         del lines[0]
         while lines[0] == '*':
             del lines[0]
         while len(lines) > 0 and lines[0][0:3] == '* @':
             l = lines[0][3:]
             try:
                 (arg, desc) = l.split(':', 1)
                 desc=desc.strip()
                 arg=arg.strip()
             except:
                 if not quiet:
                     self.warning("Misformatted macro comment for %s" % (name))
                     self.warning("  problem with '%s'" % (lines[0]))
                 del lines[0]
                 continue
             del lines[0]
             l = lines[0].strip()
             while len(l) > 2 and l[0:3] != '* @':
                 while l[0] == '*':
                     l = l[1:]
                 desc = desc + ' ' + l.strip()
                 del lines[0]
                 if len(lines) == 0:
                     break
                 l = lines[0]
             args.append((arg, desc))
         while len(lines) > 0 and lines[0] == '*':
             del lines[0]
         desc = ""
         while len(lines) > 0:
             l = lines[0]
             while len(l) > 0 and l[0] == '*':
                 l = l[1:]
             l = l.strip()
             desc = desc + " " + l
             del lines[0]

         desc = desc.strip()

         if quiet == 0:
             if desc == "":
                 self.warning("Macro comment for %s lack description of the macro" % (name))

         return((args, desc))

      #
      # Parse a comment block and merge the information found in the
      # parameters descriptions, finally returns a block as complete
      # as possible
      #
     def mergeFunctionComment(self, name, description, quiet = 0):
         if name == 'main':
             quiet = 1
         if name[0:2] == '__':
             quiet = 1

         (ret, args) = description
         desc = ""
         retdesc = ""

         if self.comment == None:
             if not quiet:
                 self.warning("Missing comment for function %s" % (name))
             return(((ret[0], retdesc), args, desc))
         if self.comment[0] != '*':
             if not quiet:
                 self.warning("Missing * in function comment for %s" % (name))
             return(((ret[0], retdesc), args, desc))
         lines = self.comment.split('\n')
         if lines[0] == '*':
             del lines[0]
         if lines[0] != "* %s:" % (name):
             if not quiet:
                 self.warning("Misformatted function comment for %s" % (name))
                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
             return(((ret[0], retdesc), args, desc))
         del lines[0]
         while lines[0] == '*':
             del lines[0]
         nbargs = len(args)
         while len(lines) > 0 and lines[0][0:3] == '* @':
             l = lines[0][3:]
             try:
                 (arg, desc) = l.split(':', 1)
                 desc=desc.strip()
                 arg=arg.strip()
             except:
                 if not quiet:
                     self.warning("Misformatted function comment for %s" % (name))
                     self.warning("  problem with '%s'" % (lines[0]))
                 del lines[0]
                 continue
             del lines[0]
             l = lines[0].strip()
             while len(l) > 2 and l[0:3] != '* @':
                 while l[0] == '*':
                     l = l[1:]
                 desc = desc + ' ' + l.strip()
                 del lines[0]
                 if len(lines) == 0:
                     break
                 l = lines[0]
             i = 0
             while i < nbargs:
                 if args[i][1] == arg:
                     args[i] = (args[i][0], arg, desc)
                     break;
                 i = i + 1
             if i >= nbargs:
                 if not quiet:
                     self.warning("Unable to find arg %s from function comment for %s" % (
                        arg, name))
         while len(lines) > 0 and lines[0] == '*':
             del lines[0]
         desc = ""
         while len(lines) > 0:
             l = lines[0]
             while len(l) > 0 and l[0] == '*':
                 l = l[1:]
             l = l.strip()
             if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
                 try:
                     l = l.split(' ', 1)[1]
                 except:
                     l = ""
                 retdesc = l.strip()
                 del lines[0]
                 while len(lines) > 0:
                     l = lines[0]
                     while len(l) > 0 and l[0] == '*':
                         l = l[1:]
                     l = l.strip()
                     retdesc = retdesc + " " + l
                     del lines[0]
             else:
                 desc = desc + " " + l
                 del lines[0]

         retdesc = retdesc.strip()
         desc = desc.strip()

         if quiet == 0:
              #
              # report missing comments
              #
             i = 0
             while i < nbargs:
                 if args[i][2] == None and args[i][0] != "void" and \
                    ((args[i][1] != None) or (args[i][1] == '')):
                     self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
                 i = i + 1
             if retdesc == "" and ret[0] != "void":
                 self.warning("Function comment for %s lacks description of return value" % (name))
             if desc == "":
                 self.warning("Function comment for %s lacks description of the function" % (name))

         return(((ret[0], retdesc), args, desc))

     def parsePreproc(self, token):
         if debug:
             print("=> preproc ", token, self.lexer.tokens)
         name = token[1]
         if name == "#include":
             token = self.lexer.token()
             if token == None:
                 return None
             if token[0] == 'preproc':
                 self.index_add(token[1], self.filename, not self.is_header,
                                 "include")
                 return self.lexer.token()
             return token
         if name == "#define":
             token = self.lexer.token()
             if token == None:
                 return None
             if token[0] == 'preproc':
                  # TODO macros with arguments
                 name = token[1]
                 lst = []
                 token = self.lexer.token()
                 while token != None and token[0] == 'preproc' and \
                       token[1][0] != '#':
                     lst.append(token[1])
                     token = self.lexer.token()
                 try:
                     name = name.split('(') [0]
                 except:
                     pass
                 info = self.parseMacroComment(name, not self.is_header)
                 self.index_add(name, self.filename, not self.is_header,
                                 "macro", info)
                 return token

         #
         # Processing of conditionals modified by Bill 1/1/05
         #
         # We process conditionals (i.e. tokens from #ifdef, #ifndef,
         # #if, #else and #endif) for headers and mainline code,
         # store the ones from the header in libxml2-api.xml, and later
         # (in the routine merge_public) verify that the two (header and
         # mainline code) agree.
         #
         # There is a small problem with processing the headers. Some of
         # the variables are not concerned with enabling / disabling of
         # library functions (e.g. '__XML_PARSER_H__'), and we don't want
         # them to be included in libxml2-api.xml, or involved in
         # the check between the header and the mainline code.  To
         # accomplish this, we ignore any conditional which doesn't include
         # the string 'ENABLED'
         #
         if name == "#ifdef":
             apstr = self.lexer.tokens[0][1]
             try:
                 self.defines.append(apstr)
                 if apstr.find('ENABLED') != -1:
                     self.conditionals.append("defined(%s)" % apstr)
             except:
                 pass
         elif name == "#ifndef":
             apstr = self.lexer.tokens[0][1]
             try:
                 self.defines.append(apstr)
                 if apstr.find('ENABLED') != -1:
                     self.conditionals.append("!defined(%s)" % apstr)
             except:
                 pass
         elif name == "#if":
             apstr = ""
             for tok in self.lexer.tokens:
                 if apstr != "":
                     apstr = apstr + " "
                 apstr = apstr + tok[1]
             try:
                 self.defines.append(apstr)
                 if apstr.find('ENABLED') != -1:
                     self.conditionals.append(apstr)
             except:
                 pass
         elif name == "#else":
             if self.conditionals != [] and \
                self.defines[-1].find('ENABLED') != -1:
                 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
         elif name == "#endif":
             if self.conditionals != [] and \
                self.defines[-1].find('ENABLED') != -1:
                 self.conditionals = self.conditionals[:-1]
             self.defines = self.defines[:-1]
         token = self.lexer.token()
         while token != None and token[0] == 'preproc' and \
             token[1][0] != '#':
             token = self.lexer.token()
         return token

      #
      # token acquisition on top of the lexer, it handle internally
      # preprocessor and comments since they are logically not part of
      # the program structure.
      #
     def token(self):
         global ignored_words

         token = self.lexer.token()
         while token != None:
             if token[0] == 'comment':
                 token = self.parseComment(token)
                 continue
             elif token[0] == 'preproc':
                 token = self.parsePreproc(token)
                 continue
             elif token[0] == "name" and token[1] == "__const":
                 token = ("name", "const")
                 return token
             elif token[0] == "name" and token[1] == "__attribute":
                 token = self.lexer.token()
                 while token != None and token[1] != ";":
                     token = self.lexer.token()
                 return token
             elif token[0] == "name" and token[1] in ignored_words:
                 (n, info) = ignored_words[token[1]]
                 i = 0
                 while i < n:
                     token = self.lexer.token()
                     i = i + 1
                 token = self.lexer.token()
                 continue
             else:
                 if debug:
                     print("=> ", token)
                 return token
         return None

      #
      # Parse a typedef, it records the type and its name.
      #
     def parseTypedef(self, token):
         if token == None:
             return None
         token = self.parseType(token)
         if token == None:
             self.error("parsing typedef")
             return None
         base_type = self.type
         type = base_type
          #self.debug("end typedef type", token)
         while token != None:
             if token[0] == "name":
                 name = token[1]
                 signature = self.signature
                 if signature != None:
                     type = type.split('(')[0]
                     d = self.mergeFunctionComment(name,
                             ((type, None), signature), 1)
                     self.index_add(name, self.filename, not self.is_header,
                                     "functype", d)
                 else:
                     if base_type == "struct":
                         self.index_add(name, self.filename, not self.is_header,
                                         "struct", type)
                         base_type = "struct " + name
                     else:
                         # TODO report missing or misformatted comments
                         info = self.parseTypeComment(name, 1)
                         self.index_add(name, self.filename, not self.is_header,
                                     "typedef", type, info)
                 token = self.token()
             else:
                 self.error("parsing typedef: expecting a name")
                 return token
              #self.debug("end typedef", token)
             if token != None and token[0] == 'sep' and token[1] == ',':
                 type = base_type
                 token = self.token()
                 while token != None and token[0] == "op":
                     type = type + token[1]
                     token = self.token()
             elif token != None and token[0] == 'sep' and token[1] == ';':
                 break;
             elif token != None and token[0] == 'name':
                 type = base_type
                 continue;
             else:
                 self.error("parsing typedef: expecting ';'", token)
                 return token
         token = self.token()
         return token

      #
      # Parse a C code block, used for functions it parse till
      # the balancing } included
      #
     def parseBlock(self, token):
         while token != None:
             if token[0] == "sep" and token[1] == "{":
                 token = self.token()
                 token = self.parseBlock(token)
             elif token[0] == "sep" and token[1] == "}":
                 self.comment = None
                 token = self.token()
                 return token
             else:
                 if self.collect_ref == 1:
                     oldtok = token
                     token = self.token()
                     if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
                         if token[0] == "sep" and token[1] == "(":
                             self.index_add_ref(oldtok[1], self.filename,
                                                 0, "function")
                             token = self.token()
                         elif token[0] == "name":
                             token = self.token()
                             if token[0] == "sep" and (token[1] == ";" or
                                token[1] == "," or token[1] == "="):
                                 self.index_add_ref(oldtok[1], self.filename,
                                                     0, "type")
                     elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
                         self.index_add_ref(oldtok[1], self.filename,
                                             0, "typedef")
                     elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
                         self.index_add_ref(oldtok[1], self.filename,
                                             0, "typedef")

                 else:
                     token = self.token()
         return token

      #
      # Parse a C struct definition till the balancing }
      #
     def parseStruct(self, token):
         fields = []
          #self.debug("start parseStruct", token)
         while token != None:
             if token[0] == "sep" and token[1] == "{":
                 token = self.token()
                 token = self.parseTypeBlock(token)
             elif token[0] == "sep" and token[1] == "}":
                 self.struct_fields = fields
                  #self.debug("end parseStruct", token)
                  #print fields
                 token = self.token()
                 return token
             else:
                 base_type = self.type
                  #self.debug("before parseType", token)
                 token = self.parseType(token)
                  #self.debug("after parseType", token)
                 if token != None and token[0] == "name":
                     fname = token[1]
                     token = self.token()
                     if token[0] == "sep" and token[1] == ";":
                         self.comment = None
                         token = self.token()
                         fields.append((self.type, fname, self.comment))
                         self.comment = None
                     else:
                         self.error("parseStruct: expecting ;", token)
                 elif token != None and token[0] == "sep" and token[1] == "{":
                     token = self.token()
                     token = self.parseTypeBlock(token)
                     if token != None and token[0] == "name":
                         token = self.token()
                     if token != None and token[0] == "sep" and token[1] == ";":
                         token = self.token()
                     else:
                         self.error("parseStruct: expecting ;", token)
                 else:
                     self.error("parseStruct: name", token)
                     token = self.token()
                 self.type = base_type;
         self.struct_fields = fields
          #self.debug("end parseStruct", token)
          #print fields
         return token

      #
      # Parse a C enum block, parse till the balancing }
      #
     def parseEnumBlock(self, token):
         self.enums = []
         name = None
         self.comment = None
         comment = ""
         value = "0"
         while token != None:
             if token[0] == "sep" and token[1] == "{":
                 token = self.token()
                 token = self.parseTypeBlock(token)
             elif token[0] == "sep" and token[1] == "}":
                 if name != None:
                     if self.comment != None:
                         comment = self.comment
                         self.comment = None
                     self.enums.append((name, value, comment))
                 token = self.token()
                 return token
             elif token[0] == "name":
                     if name != None:
                         if self.comment != None:
                             comment = self.comment.strip()
                             self.comment = None
                         self.enums.append((name, value, comment))
                     name = token[1]
                     comment = ""
                     token = self.token()
                     if token[0] == "op" and token[1][0] == "=":
                         value = ""
                         if len(token[1]) > 1:
                             value = token[1][1:]
                         token = self.token()
                         while token[0] != "sep" or (token[1] != ',' and
                               token[1] != '}'):
                             value = value + token[1]
                             token = self.token()
                     else:
                         try:
                             value = "%d" % (int(value) + 1)
                         except:
                             self.warning("Failed to compute value of enum %s" % (name))
                             value=""
                     if token[0] == "sep" and token[1] == ",":
                         token = self.token()
             else:
                 token = self.token()
         return token

      #
      # Parse a C definition block, used for structs it parse till
      # the balancing }
      #
     def parseTypeBlock(self, token):
         while token != None:
             if token[0] == "sep" and token[1] == "{":
                 token = self.token()
                 token = self.parseTypeBlock(token)
             elif token[0] == "sep" and token[1] == "}":
                 token = self.token()
                 return token
             else:
                 token = self.token()
         return token

      #
      # Parse a type: the fact that the type name can either occur after
      #    the definition or within the definition makes it a little harder
      #    if inside, the name token is pushed back before returning
      #
     def parseType(self, token):
         self.type = ""
         self.struct_fields = []
         self.signature = None
         if token == None:
             return token

         while token[0] == "name" and (
               token[1] == "const" or \
               token[1] == "unsigned" or \
               token[1] == "signed"):
             if self.type == "":
                 self.type = token[1]
             else:
                 self.type = self.type + " " + token[1]
             token = self.token()

         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
             if self.type == "":
                 self.type = token[1]
             else:
                 self.type = self.type + " " + token[1]
             if token[0] == "name" and token[1] == "int":
                 if self.type == "":
                     self.type = tmp[1]
                 else:
                     self.type = self.type + " " + tmp[1]

         elif token[0] == "name" and token[1] == "struct":
             if self.type == "":
                 self.type = token[1]
             else:
                 self.type = self.type + " " + token[1]
             token = self.token()
             nametok = None
             if token[0] == "name":
                 nametok = token
                 token = self.token()
             if token != None and token[0] == "sep" and token[1] == "{":
                 token = self.token()
                 token = self.parseStruct(token)
             elif token != None and token[0] == "op" and token[1] == "*":
                 self.type = self.type + " " + nametok[1] + " *"
                 token = self.token()
                 while token != None and token[0] == "op" and token[1] == "*":
                     self.type = self.type + " *"
                     token = self.token()
                 if token[0] == "name":
                     nametok = token
                     token = self.token()
                 else:
                     self.error("struct : expecting name", token)
                     return token
             elif token != None and token[0] == "name" and nametok != None:
                 self.type = self.type + " " + nametok[1]
                 return token

             if nametok != None:
                 self.lexer.push(token)
                 token = nametok
             return token

         elif token[0] == "name" and token[1] == "enum":
             if self.type == "":
                 self.type = token[1]
             else:
                 self.type = self.type + " " + token[1]
             self.enums = []
             token = self.token()
             if token != None and token[0] == "sep" and token[1] == "{":
                 token = self.token()
                 token = self.parseEnumBlock(token)
             else:
                 self.error("parsing enum: expecting '{'", token)
             enum_type = None
             if token != None and token[0] != "name":
                 self.lexer.push(token)
                 token = ("name", "enum")
             else:
                 enum_type = token[1]
             for enum in self.enums:
                 self.index_add(enum[0], self.filename,
                                not self.is_header, "enum",
                                (enum[1], enum[2], enum_type))
             return token

         elif token[0] == "name":
             if self.type == "":
                 self.type = token[1]
             else:
                 self.type = self.type + " " + token[1]
         else:
             self.error("parsing type %s: expecting a name" % (self.type),
                        token)
             return token
         token = self.token()
         while token != None and (token[0] == "op" or
               token[0] == "name" and token[1] == "const"):
             self.type = self.type + " " + token[1]
             token = self.token()

          #
          # if there is a parenthesis here, this means a function type
          #
         if token != None and token[0] == "sep" and token[1] == '(':
             self.type = self.type + token[1]
             token = self.token()
             while token != None and token[0] == "op" and token[1] == '*':
                 self.type = self.type + token[1]
                 token = self.token()
             if token == None or token[0] != "name" :
                 self.error("parsing function type, name expected", token);
                 return token
             self.type = self.type + token[1]
             nametok = token
             token = self.token()
             if token != None and token[0] == "sep" and token[1] == ')':
                 self.type = self.type + token[1]
                 token = self.token()
                 if token != None and token[0] == "sep" and token[1] == '(':
                     token = self.token()
                     type = self.type;
                     token = self.parseSignature(token);
                     self.type = type;
                 else:
                     self.error("parsing function type, '(' expected", token);
                     return token
             else:
                 self.error("parsing function type, ')' expected", token);
                 return token
             self.lexer.push(token)
             token = nametok
             return token

          #
          # do some lookahead for arrays
          #
         if token != None and token[0] == "name":
             nametok = token
             token = self.token()
             if token != None and token[0] == "sep" and token[1] == '[':
                 self.type = self.type + nametok[1]
                 while token != None and token[0] == "sep" and token[1] == '[':
                     self.type = self.type + token[1]
                     token = self.token()
                     while token != None and token[0] != 'sep' and \
                           token[1] != ']' and token[1] != ';':
                         self.type = self.type + token[1]
                         token = self.token()
                 if token != None and token[0] == 'sep' and token[1] == ']':
                     self.type = self.type + token[1]
                     token = self.token()
                 else:
                     self.error("parsing array type, ']' expected", token);
                     return token
             elif token != None and token[0] == "sep" and token[1] == ':':
                  # remove :12 in case it's a limited int size
                 token = self.token()
                 token = self.token()
             self.lexer.push(token)
             token = nametok

         return token

      #
      # Parse a signature: '(' has been parsed and we scan the type definition
      #    up to the ')' included
     def parseSignature(self, token):
         signature = []
         if token != None and token[0] == "sep" and token[1] == ')':
             self.signature = []
             token = self.token()
             return token
         while token != None:
             token = self.parseType(token)
             if token != None and token[0] == "name":
                 signature.append((self.type, token[1], None))
                 token = self.token()
             elif token != None and token[0] == "sep" and token[1] == ',':
                 token = self.token()
                 continue
             elif token != None and token[0] == "sep" and token[1] == ')':
                  # only the type was provided
                 if self.type == "...":
                     signature.append((self.type, "...", None))
                 else:
                     signature.append((self.type, None, None))
             if token != None and token[0] == "sep":
                 if token[1] == ',':
                     token = self.token()
                     continue
                 elif token[1] == ')':
                     token = self.token()
                     break
         self.signature = signature
         return token

      #
      # Parse a global definition, be it a type, variable or function
      # the extern "C" blocks are a bit nasty and require it to recurse.
      #
     def parseGlobal(self, token):
         static = 0
         if token[1] == 'extern':
             token = self.token()
             if token == None:
                 return token
             if token[0] == 'string':
                 if token[1] == 'C':
                     token = self.token()
                     if token == None:
                         return token
                     if token[0] == 'sep' and token[1] == "{":
                         token = self.token()
 #                         print 'Entering extern "C line ', self.lineno()
                         while token != None and (token[0] != 'sep' or
                               token[1] != "}"):
                             if token[0] == 'name':
                                 token = self.parseGlobal(token)
                             else:
                                 self.error(
                                  "token %s %s unexpected at the top level" % (
                                         token[0], token[1]))
                                 token = self.parseGlobal(token)
 #                         print 'Exiting extern "C" line', self.lineno()
                         token = self.token()
                         return token
                 else:
                     return token
         elif token[1] == 'static':
             static = 1
             token = self.token()
             if token == None or  token[0] != 'name':
                 return token

         if token[1] == 'typedef':
             token = self.token()
             return self.parseTypedef(token)
         else:
             token = self.parseType(token)
             type_orig = self.type
         if token == None or token[0] != "name":
             return token
         type = type_orig
         self.name = token[1]
         token = self.token()
         while token != None and (token[0] == "sep" or token[0] == "op"):
             if token[0] == "sep":
                 if token[1] == "[":
                     type = type + token[1]
                     token = self.token()
                     while token != None and (token[0] != "sep" or \
                           token[1] != ";"):
                         type = type + token[1]
                         token = self.token()

             if token != None and token[0] == "op" and token[1] == "=":
                  #
                  # Skip the initialization of the variable
                  #
                 token = self.token()
                 if token[0] == 'sep' and token[1] == '{':
                     token = self.token()
                     token = self.parseBlock(token)
                 else:
                     self.comment = None
                     while token != None and (token[0] != "sep" or \
                           (token[1] != ';' and token[1] != ',')):
                             token = self.token()
                 self.comment = None
                 if token == None or token[0] != "sep" or (token[1] != ';' and
                    token[1] != ','):
                     self.error("missing ';' or ',' after value")

             if token != None and token[0] == "sep":
                 if token[1] == ";":
                     self.comment = None
                     token = self.token()
                     if type == "struct":
                         self.index_add(self.name, self.filename,
                              not self.is_header, "struct", self.struct_fields)
                     else:
                         self.index_add(self.name, self.filename,
                              not self.is_header, "variable", type)
                     break
                 elif token[1] == "(":
                     token = self.token()
                     token = self.parseSignature(token)
                     if token == None:
                         return None
                     if token[0] == "sep" and token[1] == ";":
                         d = self.mergeFunctionComment(self.name,
                                 ((type, None), self.signature), 1)
                         self.index_add(self.name, self.filename, static,
                                         "function", d)
                         token = self.token()
                     elif token[0] == "sep" and token[1] == "{":
                         d = self.mergeFunctionComment(self.name,
                                 ((type, None), self.signature), static)
                         self.index_add(self.name, self.filename, static,
                                         "function", d)
                         token = self.token()
                         token = self.parseBlock(token);
                 elif token[1] == ',':
                     self.comment = None
                     self.index_add(self.name, self.filename, static,
                                     "variable", type)
                     type = type_orig
                     token = self.token()
                     while token != None and token[0] == "sep":
                         type = type + token[1]
                         token = self.token()
                     if token != None and token[0] == "name":
                         self.name = token[1]
                         token = self.token()
                 else:
                     break

         return token

     def parse(self):
         self.warning("Parsing %s" % (self.filename))
         token = self.token()
         while token != None:
             if token[0] == 'name':
                 token = self.parseGlobal(token)
             else:
                 self.error("token %s %s unexpected at the top level" % (
                        token[0], token[1]))
                 token = self.parseGlobal(token)
                 return
         self.parseTopComment(self.top_comment)
         return self.index


 class docBuilder:
     """A documentation builder"""
     def __init__(self, name, directories=['.'], excludes=[]):
         self.name = name
         self.directories = directories
         self.excludes = excludes + list(ignored_files.keys())
         self.modules = {}
         self.headers = {}
         self.idx = index()
         self.xref = {}
         self.index = {}
         if name == 'libxml2':
             self.basename = 'libxml'
         else:
             self.basename = name

     def indexString(self, id, str):
         if str == None:
             return
         str = str.replace("'", ' ')
         str = str.replace('"', ' ')
         str = str.replace("/", ' ')
         str = str.replace('*', ' ')
         str = str.replace("[", ' ')
         str = str.replace("]", ' ')
         str = str.replace("(", ' ')
         str = str.replace(")", ' ')
         str = str.replace("<", ' ')
         str = str.replace('>', ' ')
         str = str.replace("&", ' ')
         str = str.replace('#', ' ')
         str = str.replace(",", ' ')
         str = str.replace('.', ' ')
         str = str.replace(';', ' ')
         tokens = str.split()
         for token in tokens:
             try:
                 c = token[0]
                 if string.ascii_letters.find(c) < 0:
                     pass
                 elif len(token) < 3:
                     pass
                 else:
                     lower = token.lower()
                     # TODO: generalize this a bit
                     if lower == 'and' or lower == 'the':
                         pass
                     elif token in self.xref:
                         self.xref[token].append(id)
                     else:
                         self.xref[token] = [id]
             except:
                 pass

     def analyze(self):
         print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
         self.idx.analyze()

     def scanHeaders(self):
         for header in list(self.headers.keys()):
             parser = CParser(header)
             idx = parser.parse()
             self.headers[header] = idx;
             self.idx.merge(idx)

     def scanModules(self):
         for module in list(self.modules.keys()):
             parser = CParser(module)
             idx = parser.parse()
             # idx.analyze()
             self.modules[module] = idx
             self.idx.merge_public(idx)

     def scan(self):
         for directory in self.directories:
             files = glob.glob(directory + "/*.c")
             for file in files:
                 skip = 0
                 for excl in self.excludes:
                     if file.find(excl) != -1:
                         print("Skipping %s" % file)
                         skip = 1
                         break
                 if skip == 0:
                     self.modules[file] = None;
             files = glob.glob(directory + "/*.h")
             for file in files:
                 skip = 0
                 for excl in self.excludes:
                     if file.find(excl) != -1:
                         print("Skipping %s" % file)
                         skip = 1
                         break
                 if skip == 0:
                     self.headers[file] = None;
         self.scanHeaders()
         self.scanModules()

     def modulename_file(self, file):
         module = os.path.basename(file)
         if module[-2:] == '.h':
             module = module[:-2]
         elif module[-2:] == '.c':
             module = module[:-2]
         return module

     def serialize_enum(self, output, name):
         id = self.idx.enums[name]
         output.write("    <enum name='%s' file='%s'" % (name,
                      self.modulename_file(id.header)))
         if id.info != None:
             info = id.info
             if info[0] != None and info[0] != '':
                 try:
                     val = eval(info[0])
                 except:
                     val = info[0]
                 output.write(" value='%s'" % (val));
             if info[2] != None and info[2] != '':
                 output.write(" type='%s'" % info[2]);
             if info[1] != None and info[1] != '':
                 output.write(" info='%s'" % escape(info[1]));
         output.write("/>\n")

     def serialize_macro(self, output, name):
         id = self.idx.macros[name]
         output.write("    <macro name='%s' file='%s'>\n" % (name,
                      self.modulename_file(id.header)))
         if id.info != None:
             try:
                 (args, desc) = id.info
                 if desc != None and desc != "":
                     output.write("      <info>%s</info>\n" % (escape(desc)))
                     self.indexString(name, desc)
                 for arg in args:
                     (name, desc) = arg
                     if desc != None and desc != "":
                         output.write("      <arg name='%s' info='%s'/>\n" % (
                                      name, escape(desc)))
                         self.indexString(name, desc)
                     else:
                         output.write("      <arg name='%s'/>\n" % (name))
             except:
                 pass
         output.write("    </macro>\n")

     def serialize_typedef(self, output, name):
         id = self.idx.typedefs[name]
         if id.info[0:7] == 'struct ':
             output.write("    <struct name='%s' file='%s' type='%s'" % (
                      name, self.modulename_file(id.header), id.info))
             name = id.info[7:]
             if name in self.idx.structs and ( \
                type(self.idx.structs[name].info) == type(()) or
                 type(self.idx.structs[name].info) == type([])):
                 output.write(">\n");
                 try:
                     for field in self.idx.structs[name].info:
                         desc = field[2]
                         self.indexString(name, desc)
                         if desc == None:
                             desc = ''
                         else:
                             desc = escape(desc)
                         output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
                 except:
                     print("Failed to serialize struct %s" % (name))
                 output.write("    </struct>\n")
             else:
                 output.write("/>\n");
         else :
             output.write("    <typedef name='%s' file='%s' type='%s'" % (
                          name, self.modulename_file(id.header), id.info))
             try:
                 desc = id.extra
                 if desc != None and desc != "":
                     output.write(">\n      <info>%s</info>\n" % (escape(desc)))
                     output.write("    </typedef>\n")
                 else:
                     output.write("/>\n")
             except:
                 output.write("/>\n")

     def serialize_variable(self, output, name):
         id = self.idx.variables[name]
         if id.info != None:
             output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
                     name, self.modulename_file(id.header), id.info))
         else:
             output.write("    <variable name='%s' file='%s'/>\n" % (
                     name, self.modulename_file(id.header)))

     def serialize_function(self, output, name):
         id = self.idx.functions[name]
         if name == debugsym:
             print("=>", id)

         output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
                      name, self.modulename_file(id.header),
                      self.modulename_file(id.module)))
         #
         # Processing of conditionals modified by Bill 1/1/05
         #
         if id.conditionals != None:
             apstr = ""
             for cond in id.conditionals:
                 if apstr != "":
                     apstr = apstr + " &amp;&amp; "
                 apstr = apstr + cond
             output.write("      <cond>%s</cond>\n"% (apstr));
         try:
             (ret, params, desc) = id.info
             if (desc == None or desc == '') and \
                name[0:9] != "xmlThrDef" and name != "xmlDllMain":
                 print("%s %s from %s has no description" % (id.type, name,
                        self.modulename_file(id.module)))

             output.write("      <info>%s</info>\n" % (escape(desc)))
             self.indexString(name, desc)
             if ret[0] != None:
                 if ret[0] == "void":
                     output.write("      <return type='void'/>\n")
                 else:
                     output.write("      <return type='%s' info='%s'/>\n" % (
                              ret[0], escape(ret[1])))
                     self.indexString(name, ret[1])
             for param in params:
                 if param[0] == 'void':
                     continue
                 if param[2] == None:
                     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
                 else:
                     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
                     self.indexString(name, param[2])
         except:
             print("Failed to save function %s info: " % name, repr(id.info))
         output.write("    </%s>\n" % (id.type))

     def serialize_exports(self, output, file):
         module = self.modulename_file(file)
         output.write("    <file name='%s'>\n" % (module))
         dict = self.headers[file]
         if dict.info != None:
             for data in ('Summary', 'Description', 'Author'):
                 try:
                     output.write("     <%s>%s</%s>\n" % (
                                  data.lower(),
                                  escape(dict.info[data]),
                                  data.lower()))
                 except:
                     print("Header %s lacks a %s description" % (module, data))
             if 'Description' in dict.info:
                 desc = dict.info['Description']
                 if desc.find("DEPRECATED") != -1:
                     output.write("     <deprecated/>\n")

         ids = list(dict.macros.keys())
         ids.sort()
         for id in uniq(ids):
             # Macros are sometime used to masquerade other types.
             if id in dict.functions:
                 continue
             if id in dict.variables:
                 continue
             if id in dict.typedefs:
                 continue
             if id in dict.structs:
                 continue
             if id in dict.enums:
                 continue
             output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
         ids = list(dict.enums.keys())
         ids.sort()
         for id in uniq(ids):
             output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
         ids = list(dict.typedefs.keys())
         ids.sort()
         for id in uniq(ids):
             output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
         ids = list(dict.structs.keys())
         ids.sort()
         for id in uniq(ids):
             output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
         ids = list(dict.variables.keys())
         ids.sort()
         for id in uniq(ids):
             output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
         ids = list(dict.functions.keys())
         ids.sort()
         for id in uniq(ids):
             output.write("     <exports symbol='%s' type='function'/>\n" % (id))
         output.write("    </file>\n")

     def serialize_xrefs_files(self, output):
         headers = list(self.headers.keys())
         headers.sort()
         for file in headers:
             module = self.modulename_file(file)
             output.write("    <file name='%s'>\n" % (module))
             dict = self.headers[file]
             ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
                   list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
                   list(dict.structs.keys()) + list(dict.enums.keys()))
             ids.sort()
             for id in ids:
                 output.write("      <ref name='%s'/>\n" % (id))
             output.write("    </file>\n")
         pass

     def serialize_xrefs_functions(self, output):
         funcs = {}
         for name in list(self.idx.functions.keys()):
             id = self.idx.functions[name]
             try:
                 (ret, params, desc) = id.info
                 for param in params:
                     if param[0] == 'void':
                         continue
                     if param[0] in funcs:
                         funcs[param[0]].append(name)
                     else:
                         funcs[param[0]] = [name]
             except:
                 pass
         typ = list(funcs.keys())
         typ.sort()
         for type in typ:
             if type == '' or type == 'void' or type == "int" or \
                type == "char *" or type == "const char *" :
                 continue
             output.write("    <type name='%s'>\n" % (type))
             ids = funcs[type]
             ids.sort()
             pid = ''        # not sure why we have dups, but get rid of them!
             for id in ids:
                 if id != pid:
                     output.write("      <ref name='%s'/>\n" % (id))
                     pid = id
             output.write("    </type>\n")

     def serialize_xrefs_constructors(self, output):
         funcs = {}
         for name in list(self.idx.functions.keys()):
             id = self.idx.functions[name]
             try:
                 (ret, params, desc) = id.info
                 if ret[0] == "void":
                     continue
                 if ret[0] in funcs:
                     funcs[ret[0]].append(name)
                 else:
                     funcs[ret[0]] = [name]
             except:
                 pass
         typ = list(funcs.keys())
         typ.sort()
         for type in typ:
             if type == '' or type == 'void' or type == "int" or \
                type == "char *" or type == "const char *" :
                 continue
             output.write("    <type name='%s'>\n" % (type))
             ids = funcs[type]
             ids.sort()
             for id in ids:
                 output.write("      <ref name='%s'/>\n" % (id))
             output.write("    </type>\n")

     def serialize_xrefs_alpha(self, output):
         letter = None
         ids = list(self.idx.identifiers.keys())
         ids.sort()
         for id in ids:
             if id[0] != letter:
                 if letter != None:
                     output.write("    </letter>\n")
                 letter = id[0]
                 output.write("    <letter name='%s'>\n" % (letter))
             output.write("      <ref name='%s'/>\n" % (id))
         if letter != None:
             output.write("    </letter>\n")

     def serialize_xrefs_references(self, output):
         typ = list(self.idx.identifiers.keys())
         typ.sort()
         for id in typ:
             idf = self.idx.identifiers[id]
             module = idf.header
             output.write("    <reference name='%s' href='%s'/>\n" % (id,
                          'html/' + self.basename + '-' +
                          self.modulename_file(module) + '.html#' +
                          id))

     def serialize_xrefs_index(self, output):
         index = self.xref
         typ = list(index.keys())
         typ.sort()
         letter = None
         count = 0
         chunk = 0
         chunks = []
         for id in typ:
             if len(index[id]) > 30:
                 continue
             if id[0] != letter:
                 if letter == None or count > 200:
                     if letter != None:
                         output.write("      </letter>\n")
                         output.write("    </chunk>\n")
                         count = 0
                         chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
                     output.write("    <chunk name='chunk%s'>\n" % (chunk))
                     first_letter = id[0]
                     chunk = chunk + 1
                 elif letter != None:
                     output.write("      </letter>\n")
                 letter = id[0]
                 output.write("      <letter name='%s'>\n" % (letter))
             output.write("        <word name='%s'>\n" % (id))
             tokens = index[id];
             tokens.sort()
             tok = None
             for token in tokens:
                 if tok == token:
                     continue
                 tok = token
                 output.write("          <ref name='%s'/>\n" % (token))
                 count = count + 1
             output.write("        </word>\n")
         if letter != None:
             output.write("      </letter>\n")
             output.write("    </chunk>\n")
             if count != 0:
                 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
             output.write("    <chunks>\n")
             for ch in chunks:
                 output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
                              ch[0], ch[1], ch[2]))
             output.write("    </chunks>\n")

     def serialize_xrefs(self, output):
         output.write("  <references>\n")
         self.serialize_xrefs_references(output)
         output.write("  </references>\n")
         output.write("  <alpha>\n")
         self.serialize_xrefs_alpha(output)
         output.write("  </alpha>\n")
         output.write("  <constructors>\n")
         self.serialize_xrefs_constructors(output)
         output.write("  </constructors>\n")
         output.write("  <functions>\n")
         self.serialize_xrefs_functions(output)
         output.write("  </functions>\n")
         output.write("  <files>\n")
         self.serialize_xrefs_files(output)
         output.write("  </files>\n")
         output.write("  <index>\n")
         self.serialize_xrefs_index(output)
         output.write("  </index>\n")

     def serialize(self):
         filename = "%s-api.xml" % self.name
         print("Saving XML description %s" % (filename))
         output = open(filename, "w")
         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
         output.write("<api name='%s'>\n" % self.name)
         output.write("  <files>\n")
         headers = list(self.headers.keys())
         headers.sort()
         for file in headers:
             self.serialize_exports(output, file)
         output.write("  </files>\n")
         output.write("  <symbols>\n")
         macros = list(self.idx.macros.keys())
         macros.sort()
         for macro in macros:
             self.serialize_macro(output, macro)
         enums = list(self.idx.enums.keys())
         enums.sort()
         for enum in enums:
             self.serialize_enum(output, enum)
         typedefs = list(self.idx.typedefs.keys())
         typedefs.sort()
         for typedef in typedefs:
             self.serialize_typedef(output, typedef)
         variables = list(self.idx.variables.keys())
         variables.sort()
         for variable in variables:
             self.serialize_variable(output, variable)
         functions = list(self.idx.functions.keys())
         functions.sort()
         for function in functions:
             self.serialize_function(output, function)
         output.write("  </symbols>\n")
         output.write("</api>\n")
         output.close()

         filename = "%s-refs.xml" % self.name
         print("Saving XML Cross References %s" % (filename))
         output = open(filename, "w")
         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
         output.write("<apirefs name='%s'>\n" % self.name)
         self.serialize_xrefs(output)
         output.write("</apirefs>\n")
         output.close()


 def rebuild():
     builder = None
     if glob.glob("parser.c") != [] :
         print("Rebuilding API description for libxml2")
         builder = docBuilder("libxml2", [".", "."],
                              ["xmlwin32version.h", "tst.c"])
     elif glob.glob("../parser.c") != [] :
         print("Rebuilding API description for libxml2")
         builder = docBuilder("libxml2", ["..", "../include/libxml"],
                              ["xmlwin32version.h", "tst.c"])
     elif glob.glob("../libxslt/transform.c") != [] :
         print("Rebuilding API description for libxslt")
         builder = docBuilder("libxslt", ["../libxslt"],
                              ["win32config.h", "libxslt.h", "tst.c"])
     else:
         print("rebuild() failed, unable to guess the module")
         return None
     builder.scan()
     builder.analyze()
     builder.serialize()
     if glob.glob("../libexslt/exslt.c") != [] :
         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
         extra.scan()
         extra.analyze()
         extra.serialize()
     return builder

 #
 # for debugging the parser
 #
 def parse(filename):
     parser = CParser(filename)
     idx = parser.parse()
     return idx

 if __name__ == "__main__":
     if len(sys.argv) > 1:
         debug = 1
         parse(sys.argv[1])
     else:
         rebuild()