| #!/usr/bin/python -u |
| # |
| # This is the API builder, it parses the C sources and build the |
| # API formal description in XML. |
| # |
| # See Copyright for the status of this software. |
| # |
| # daniel@veillard.com |
| # |
| import os, sys |
| import string |
| import glob |
| |
| debug=0 |
| #debugsym='ignorableWhitespaceSAXFunc' |
| debugsym=None |
| |
| # |
| # C parser analysis code |
| # |
| ignored_files = { |
| "trio": "too many non standard macros", |
| "trio.c": "too many non standard macros", |
| "trionan.c": "too many non standard macros", |
| "triostr.c": "too many non standard macros", |
| "acconfig.h": "generated portability layer", |
| "config.h": "generated portability layer", |
| "libxml.h": "internal only", |
| "testOOM.c": "out of memory tester", |
| "testOOMlib.h": "out of memory tester", |
| "testOOMlib.c": "out of memory tester", |
| "rngparser.c": "not yet integrated", |
| "rngparser.h": "not yet integrated", |
| "elfgcchack.h": "not a normal header", |
| "testHTML.c": "test tool", |
| "testReader.c": "test tool", |
| "testSchemas.c": "test tool", |
| "testXPath.c": "test tool", |
| "testAutomata.c": "test tool", |
| "testModule.c": "test tool", |
| "testRegexp.c": "test tool", |
| "testThreads.c": "test tool", |
| "testC14N.c": "test tool", |
| "testRelax.c": "test tool", |
| "testSAX.c": "test tool", |
| "testURI.c": "test tool", |
| "testapi.c": "generated regression tests", |
| "runtest.c": "regression tests program", |
| "runsuite.c": "regression tests program", |
| "tst.c": "not part of the library", |
| "test.c": "not part of the library", |
| "testdso.c": "test for dynamid shared libraries", |
| "testrecurse.c": "test for entities recursions", |
| "xzlib.h": "Internal API only 2.8.0", |
| "buf.h": "Internal API only 2.9.0", |
| "enc.h": "Internal API only 2.9.0", |
| "/save.h": "Internal API only 2.9.0", |
| "timsort.h": "Internal header only for xpath.c 2.9.0", |
| } |
| |
| ignored_words = { |
| "WINAPI": (0, "Windows keyword"), |
| "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), |
| "XMLPUBVAR": (0, "Special macro for extern vars for win32"), |
| "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), |
| "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), |
| "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), |
| "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), |
| "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), |
| "XMLCALL": (0, "Special macro for win32 calls"), |
| "XSLTCALL": (0, "Special macro for win32 calls"), |
| "XMLCDECL": (0, "Special macro for win32 calls"), |
| "EXSLTCALL": (0, "Special macro for win32 calls"), |
| "__declspec": (3, "Windows keyword"), |
| "__stdcall": (0, "Windows keyword"), |
| "ATTRIBUTE_UNUSED": (0, "macro keyword"), |
| "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"), |
| "LIBEXSLT_PUBLIC": (0, "macro keyword"), |
| "X_IN_Y": (5, "macro function builder"), |
| "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), |
| "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), |
| "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), |
| "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), |
| "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"), |
| } |
| |
| def escape(raw): |
| raw = raw.replace('&', '&') |
| raw = raw.replace('<', '<') |
| raw = raw.replace('>', '>') |
| raw = raw.replace("'", ''') |
| raw = raw.replace('"', '"') |
| return raw |
| |
| def uniq(items): |
| d = {} |
| for item in items: |
| d[item]=1 |
| return list(d.keys()) |
| |
| class identifier: |
| def __init__(self, name, header=None, module=None, type=None, lineno = 0, |
| info=None, extra=None, conditionals = None): |
| self.name = name |
| self.header = header |
| self.module = module |
| self.type = type |
| self.info = info |
| self.extra = extra |
| self.lineno = lineno |
| self.static = 0 |
| if conditionals == None or len(conditionals) == 0: |
| self.conditionals = None |
| else: |
| self.conditionals = conditionals[:] |
| if self.name == debugsym: |
| print("=> define %s : %s" % (debugsym, (module, type, info, |
| extra, conditionals))) |
| |
| def __repr__(self): |
| r = "%s %s:" % (self.type, self.name) |
| if self.static: |
| r = r + " static" |
| if self.module != None: |
| r = r + " from %s" % (self.module) |
| if self.info != None: |
| r = r + " " + repr(self.info) |
| if self.extra != None: |
| r = r + " " + repr(self.extra) |
| if self.conditionals != None: |
| r = r + " " + repr(self.conditionals) |
| return r |
| |
| |
| def set_header(self, header): |
| self.header = header |
| def set_module(self, module): |
| self.module = module |
| def set_type(self, type): |
| self.type = type |
| def set_info(self, info): |
| self.info = info |
| def set_extra(self, extra): |
| self.extra = extra |
| def set_lineno(self, lineno): |
| self.lineno = lineno |
| def set_static(self, static): |
| self.static = static |
| def set_conditionals(self, conditionals): |
| if conditionals == None or len(conditionals) == 0: |
| self.conditionals = None |
| else: |
| self.conditionals = conditionals[:] |
| |
| def get_name(self): |
| return self.name |
| def get_header(self): |
| return self.module |
| def get_module(self): |
| return self.module |
| def get_type(self): |
| return self.type |
| def get_info(self): |
| return self.info |
| def get_lineno(self): |
| return self.lineno |
| def get_extra(self): |
| return self.extra |
| def get_static(self): |
| return self.static |
| def get_conditionals(self): |
| return self.conditionals |
| |
| def update(self, header, module, type = None, info = None, extra=None, |
| conditionals=None): |
| if self.name == debugsym: |
| print("=> update %s : %s" % (debugsym, (module, type, info, |
| extra, conditionals))) |
| if header != None and self.header == None: |
| self.set_header(module) |
| if module != None and (self.module == None or self.header == self.module): |
| self.set_module(module) |
| if type != None and self.type == None: |
| self.set_type(type) |
| if info != None: |
| self.set_info(info) |
| if extra != None: |
| self.set_extra(extra) |
| if conditionals != None: |
| self.set_conditionals(conditionals) |
| |
| class index: |
| def __init__(self, name = "noname"): |
| self.name = name |
| self.identifiers = {} |
| self.functions = {} |
| self.variables = {} |
| self.includes = {} |
| self.structs = {} |
| self.enums = {} |
| self.typedefs = {} |
| self.macros = {} |
| self.references = {} |
| self.info = {} |
| |
| def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): |
| if name[0:2] == '__': |
| return None |
| d = None |
| try: |
| d = self.identifiers[name] |
| d.update(header, module, type, lineno, info, extra, conditionals) |
| except: |
| d = identifier(name, header, module, type, lineno, info, extra, conditionals) |
| self.identifiers[name] = d |
| |
| if d != None and static == 1: |
| d.set_static(1) |
| |
| if d != None and name != None and type != None: |
| self.references[name] = d |
| |
| if name == debugsym: |
| print("New ref: %s" % (d)) |
| |
| return d |
| |
| def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): |
| if name[0:2] == '__': |
| return None |
| d = None |
| try: |
| d = self.identifiers[name] |
| d.update(header, module, type, lineno, info, extra, conditionals) |
| except: |
| d = identifier(name, header, module, type, lineno, info, extra, conditionals) |
| self.identifiers[name] = d |
| |
| if d != None and static == 1: |
| d.set_static(1) |
| |
| if d != None and name != None and type != None: |
| if type == "function": |
| self.functions[name] = d |
| elif type == "functype": |
| self.functions[name] = d |
| elif type == "variable": |
| self.variables[name] = d |
| elif type == "include": |
| self.includes[name] = d |
| elif type == "struct": |
| self.structs[name] = d |
| elif type == "enum": |
| self.enums[name] = d |
| elif type == "typedef": |
| self.typedefs[name] = d |
| elif type == "macro": |
| self.macros[name] = d |
| else: |
| print("Unable to register type ", type) |
| |
| if name == debugsym: |
| print("New symbol: %s" % (d)) |
| |
| return d |
| |
| def merge(self, idx): |
| for id in list(idx.functions.keys()): |
| # |
| # macro might be used to override functions or variables |
| # definitions |
| # |
| if id in self.macros: |
| del self.macros[id] |
| if id in self.functions: |
| print("function %s from %s redeclared in %s" % ( |
| id, self.functions[id].header, idx.functions[id].header)) |
| else: |
| self.functions[id] = idx.functions[id] |
| self.identifiers[id] = idx.functions[id] |
| for id in list(idx.variables.keys()): |
| # |
| # macro might be used to override functions or variables |
| # definitions |
| # |
| if id in self.macros: |
| del self.macros[id] |
| if id in self.variables: |
| print("variable %s from %s redeclared in %s" % ( |
| id, self.variables[id].header, idx.variables[id].header)) |
| else: |
| self.variables[id] = idx.variables[id] |
| self.identifiers[id] = idx.variables[id] |
| for id in list(idx.structs.keys()): |
| if id in self.structs: |
| print("struct %s from %s redeclared in %s" % ( |
| id, self.structs[id].header, idx.structs[id].header)) |
| else: |
| self.structs[id] = idx.structs[id] |
| self.identifiers[id] = idx.structs[id] |
| for id in list(idx.typedefs.keys()): |
| if id in self.typedefs: |
| print("typedef %s from %s redeclared in %s" % ( |
| id, self.typedefs[id].header, idx.typedefs[id].header)) |
| else: |
| self.typedefs[id] = idx.typedefs[id] |
| self.identifiers[id] = idx.typedefs[id] |
| for id in list(idx.macros.keys()): |
| # |
| # macro might be used to override functions or variables |
| # definitions |
| # |
| if id in self.variables: |
| continue |
| if id in self.functions: |
| continue |
| if id in self.enums: |
| continue |
| if id in self.macros: |
| print("macro %s from %s redeclared in %s" % ( |
| id, self.macros[id].header, idx.macros[id].header)) |
| else: |
| self.macros[id] = idx.macros[id] |
| self.identifiers[id] = idx.macros[id] |
| for id in list(idx.enums.keys()): |
| if id in self.enums: |
| print("enum %s from %s redeclared in %s" % ( |
| id, self.enums[id].header, idx.enums[id].header)) |
| else: |
| self.enums[id] = idx.enums[id] |
| self.identifiers[id] = idx.enums[id] |
| |
| def merge_public(self, idx): |
| for id in list(idx.functions.keys()): |
| if id in self.functions: |
| # check that function condition agrees with header |
| if idx.functions[id].conditionals != \ |
| self.functions[id].conditionals: |
| print("Header condition differs from Function for %s:" \ |
| % id) |
| print(" H: %s" % self.functions[id].conditionals) |
| print(" C: %s" % idx.functions[id].conditionals) |
| up = idx.functions[id] |
| self.functions[id].update(None, up.module, up.type, up.info, up.extra) |
| # else: |
| # print "Function %s from %s is not declared in headers" % ( |
| # id, idx.functions[id].module) |
| # TODO: do the same for variables. |
| |
| def analyze_dict(self, type, dict): |
| count = 0 |
| public = 0 |
| for name in list(dict.keys()): |
| id = dict[name] |
| count = count + 1 |
| if id.static == 0: |
| public = public + 1 |
| if count != public: |
| print(" %d %s , %d public" % (count, type, public)) |
| elif count != 0: |
| print(" %d public %s" % (count, type)) |
| |
| |
| def analyze(self): |
| self.analyze_dict("functions", self.functions) |
| self.analyze_dict("variables", self.variables) |
| self.analyze_dict("structs", self.structs) |
| self.analyze_dict("typedefs", self.typedefs) |
| self.analyze_dict("macros", self.macros) |
| |
| class CLexer: |
| """A lexer for the C language, tokenize the input by reading and |
| analyzing it line by line""" |
| def __init__(self, input): |
| self.input = input |
| self.tokens = [] |
| self.line = "" |
| self.lineno = 0 |
| |
| def getline(self): |
| line = '' |
| while line == '': |
| line = self.input.readline() |
| if not line: |
| return None |
| self.lineno = self.lineno + 1 |
| line = line.lstrip() |
| line = line.rstrip() |
| if line == '': |
| continue |
| while line[-1] == '\\': |
| line = line[:-1] |
| n = self.input.readline() |
| self.lineno = self.lineno + 1 |
| n = n.lstrip() |
| n = n.rstrip() |
| if not n: |
| break |
| else: |
| line = line + n |
| return line |
| |
| def getlineno(self): |
| return self.lineno |
| |
| def push(self, token): |
| self.tokens.insert(0, token); |
| |
| def debug(self): |
| print("Last token: ", self.last) |
| print("Token queue: ", self.tokens) |
| print("Line %d end: " % (self.lineno), self.line) |
| |
| def token(self): |
| while self.tokens == []: |
| if self.line == "": |
| line = self.getline() |
| else: |
| line = self.line |
| self.line = "" |
| if line == None: |
| return None |
| |
| if line[0] == '#': |
| self.tokens = list(map((lambda x: ('preproc', x)), |
| line.split())) |
| break; |
| l = len(line) |
| if line[0] == '"' or line[0] == "'": |
| end = line[0] |
| line = line[1:] |
| found = 0 |
| tok = "" |
| while found == 0: |
| i = 0 |
| l = len(line) |
| while i < l: |
| if line[i] == end: |
| self.line = line[i+1:] |
| line = line[:i] |
| l = i |
| found = 1 |
| break |
| if line[i] == '\\': |
| i = i + 1 |
| i = i + 1 |
| tok = tok + line |
| if found == 0: |
| line = self.getline() |
| if line == None: |
| return None |
| self.last = ('string', tok) |
| return self.last |
| |
| if l >= 2 and line[0] == '/' and line[1] == '*': |
| line = line[2:] |
| found = 0 |
| tok = "" |
| while found == 0: |
| i = 0 |
| l = len(line) |
| while i < l: |
| if line[i] == '*' and i+1 < l and line[i+1] == '/': |
| self.line = line[i+2:] |
| line = line[:i-1] |
| l = i |
| found = 1 |
| break |
| i = i + 1 |
| if tok != "": |
| tok = tok + "\n" |
| tok = tok + line |
| if found == 0: |
| line = self.getline() |
| if line == None: |
| return None |
| self.last = ('comment', tok) |
| return self.last |
| if l >= 2 and line[0] == '/' and line[1] == '/': |
| line = line[2:] |
| self.last = ('comment', line) |
| return self.last |
| i = 0 |
| while i < l: |
| if line[i] == '/' and i+1 < l and line[i+1] == '/': |
| self.line = line[i:] |
| line = line[:i] |
| break |
| if line[i] == '/' and i+1 < l and line[i+1] == '*': |
| self.line = line[i:] |
| line = line[:i] |
| break |
| if line[i] == '"' or line[i] == "'": |
| self.line = line[i:] |
| line = line[:i] |
| break |
| i = i + 1 |
| l = len(line) |
| i = 0 |
| while i < l: |
| if line[i] == ' ' or line[i] == '\t': |
| i = i + 1 |
| continue |
| o = ord(line[i]) |
| if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ |
| (o >= 48 and o <= 57): |
| s = i |
| while i < l: |
| o = ord(line[i]) |
| if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ |
| (o >= 48 and o <= 57) or \ |
| (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: |
| i = i + 1 |
| else: |
| break |
| self.tokens.append(('name', line[s:i])) |
| continue |
| if "(){}:;,[]".find(line[i]) != -1: |
| # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ |
| # line[i] == '}' or line[i] == ':' or line[i] == ';' or \ |
| # line[i] == ',' or line[i] == '[' or line[i] == ']': |
| self.tokens.append(('sep', line[i])) |
| i = i + 1 |
| continue |
| if "+-*><=/%&!|.".find(line[i]) != -1: |
| # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ |
| # line[i] == '>' or line[i] == '<' or line[i] == '=' or \ |
| # line[i] == '/' or line[i] == '%' or line[i] == '&' or \ |
| # line[i] == '!' or line[i] == '|' or line[i] == '.': |
| if line[i] == '.' and i + 2 < l and \ |
| line[i+1] == '.' and line[i+2] == '.': |
| self.tokens.append(('name', '...')) |
| i = i + 3 |
| continue |
| |
| j = i + 1 |
| if j < l and ( |
| "+-*><=/%&!|".find(line[j]) != -1): |
| # line[j] == '+' or line[j] == '-' or line[j] == '*' or \ |
| # line[j] == '>' or line[j] == '<' or line[j] == '=' or \ |
| # line[j] == '/' or line[j] == '%' or line[j] == '&' or \ |
| # line[j] == '!' or line[j] == '|'): |
| self.tokens.append(('op', line[i:j+1])) |
| i = j + 1 |
| else: |
| self.tokens.append(('op', line[i])) |
| i = i + 1 |
| continue |
| s = i |
| while i < l: |
| o = ord(line[i]) |
| if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ |
| (o >= 48 and o <= 57) or ( |
| " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): |
| # line[i] != ' ' and line[i] != '\t' and |
| # line[i] != '(' and line[i] != ')' and |
| # line[i] != '{' and line[i] != '}' and |
| # line[i] != ':' and line[i] != ';' and |
| # line[i] != ',' and line[i] != '+' and |
| # line[i] != '-' and line[i] != '*' and |
| # line[i] != '/' and line[i] != '%' and |
| # line[i] != '&' and line[i] != '!' and |
| # line[i] != '|' and line[i] != '[' and |
| # line[i] != ']' and line[i] != '=' and |
| # line[i] != '*' and line[i] != '>' and |
| # line[i] != '<'): |
| i = i + 1 |
| else: |
| break |
| self.tokens.append(('name', line[s:i])) |
| |
| tok = self.tokens[0] |
| self.tokens = self.tokens[1:] |
| self.last = tok |
| return tok |
| |
| class CParser: |
| """The C module parser""" |
| def __init__(self, filename, idx = None): |
| self.filename = filename |
| if len(filename) > 2 and filename[-2:] == '.h': |
| self.is_header = 1 |
| else: |
| self.is_header = 0 |
| self.input = open(filename) |
| self.lexer = CLexer(self.input) |
| if idx == None: |
| self.index = index() |
| else: |
| self.index = idx |
| self.top_comment = "" |
| self.last_comment = "" |
| self.comment = None |
| self.collect_ref = 0 |
| self.no_error = 0 |
| self.conditionals = [] |
| self.defines = [] |
| |
| def collect_references(self): |
| self.collect_ref = 1 |
| |
| def stop_error(self): |
| self.no_error = 1 |
| |
| def start_error(self): |
| self.no_error = 0 |
| |
| def lineno(self): |
| return self.lexer.getlineno() |
| |
| def index_add(self, name, module, static, type, info=None, extra = None): |
| if self.is_header == 1: |
| self.index.add(name, module, module, static, type, self.lineno(), |
| info, extra, self.conditionals) |
| else: |
| self.index.add(name, None, module, static, type, self.lineno(), |
| info, extra, self.conditionals) |
| |
| def index_add_ref(self, name, module, static, type, info=None, |
| extra = None): |
| if self.is_header == 1: |
| self.index.add_ref(name, module, module, static, type, |
| self.lineno(), info, extra, self.conditionals) |
| else: |
| self.index.add_ref(name, None, module, static, type, self.lineno(), |
| info, extra, self.conditionals) |
| |
| def warning(self, msg): |
| if self.no_error: |
| return |
| print(msg) |
| |
| def error(self, msg, token=-1): |
| if self.no_error: |
| return |
| |
| print("Parse Error: " + msg) |
| if token != -1: |
| print("Got token ", token) |
| self.lexer.debug() |
| sys.exit(1) |
| |
| def debug(self, msg, token=-1): |
| print("Debug: " + msg) |
| if token != -1: |
| print("Got token ", token) |
| self.lexer.debug() |
| |
| def parseTopComment(self, comment): |
| res = {} |
| lines = comment.split("\n") |
| item = None |
| for line in lines: |
| while line != "" and (line[0] == ' ' or line[0] == '\t'): |
| line = line[1:] |
| while line != "" and line[0] == '*': |
| line = line[1:] |
| while line != "" and (line[0] == ' ' or line[0] == '\t'): |
| line = line[1:] |
| try: |
| (it, line) = line.split(":", 1) |
| item = it |
| while line != "" and (line[0] == ' ' or line[0] == '\t'): |
| line = line[1:] |
| if item in res: |
| res[item] = res[item] + " " + line |
| else: |
| res[item] = line |
| except: |
| if item != None: |
| if item in res: |
| res[item] = res[item] + " " + line |
| else: |
| res[item] = line |
| self.index.info = res |
| |
| def parseComment(self, token): |
| if self.top_comment == "": |
| self.top_comment = token[1] |
| if self.comment == None or token[1][0] == '*': |
| self.comment = token[1]; |
| else: |
| self.comment = self.comment + token[1] |
| token = self.lexer.token() |
| |
| if self.comment.find("DOC_DISABLE") != -1: |
| self.stop_error() |
| |
| if self.comment.find("DOC_ENABLE") != -1: |
| self.start_error() |
| |
| return token |
| |
| # |
| # Parse a comment block associate to a typedef |
| # |
| def parseTypeComment(self, name, quiet = 0): |
| if name[0:2] == '__': |
| quiet = 1 |
| |
| args = [] |
| desc = "" |
| |
| if self.comment == None: |
| if not quiet: |
| self.warning("Missing comment for type %s" % (name)) |
| return((args, desc)) |
| if self.comment[0] != '*': |
| if not quiet: |
| self.warning("Missing * in type comment for %s" % (name)) |
| return((args, desc)) |
| lines = self.comment.split('\n') |
| if lines[0] == '*': |
| del lines[0] |
| if lines[0] != "* %s:" % (name): |
| if not quiet: |
| self.warning("Misformatted type comment for %s" % (name)) |
| self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) |
| return((args, desc)) |
| del lines[0] |
| while len(lines) > 0 and lines[0] == '*': |
| del lines[0] |
| desc = "" |
| while len(lines) > 0: |
| l = lines[0] |
| while len(l) > 0 and l[0] == '*': |
| l = l[1:] |
| l = l.strip() |
| desc = desc + " " + l |
| del lines[0] |
| |
| desc = desc.strip() |
| |
| if quiet == 0: |
| if desc == "": |
| self.warning("Type comment for %s lack description of the macro" % (name)) |
| |
| return(desc) |
| # |
| # Parse a comment block associate to a macro |
| # |
| def parseMacroComment(self, name, quiet = 0): |
| if name[0:2] == '__': |
| quiet = 1 |
| |
| args = [] |
| desc = "" |
| |
| if self.comment == None: |
| if not quiet: |
| self.warning("Missing comment for macro %s" % (name)) |
| return((args, desc)) |
| if self.comment[0] != '*': |
| if not quiet: |
| self.warning("Missing * in macro comment for %s" % (name)) |
| return((args, desc)) |
| lines = self.comment.split('\n') |
| if lines[0] == '*': |
| del lines[0] |
| if lines[0] != "* %s:" % (name): |
| if not quiet: |
| self.warning("Misformatted macro comment for %s" % (name)) |
| self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) |
| return((args, desc)) |
| del lines[0] |
| while lines[0] == '*': |
| del lines[0] |
| while len(lines) > 0 and lines[0][0:3] == '* @': |
| l = lines[0][3:] |
| try: |
| (arg, desc) = l.split(':', 1) |
| desc=desc.strip() |
| arg=arg.strip() |
| except: |
| if not quiet: |
| self.warning("Misformatted macro comment for %s" % (name)) |
| self.warning(" problem with '%s'" % (lines[0])) |
| del lines[0] |
| continue |
| del lines[0] |
| l = lines[0].strip() |
| while len(l) > 2 and l[0:3] != '* @': |
| while l[0] == '*': |
| l = l[1:] |
| desc = desc + ' ' + l.strip() |
| del lines[0] |
| if len(lines) == 0: |
| break |
| l = lines[0] |
| args.append((arg, desc)) |
| while len(lines) > 0 and lines[0] == '*': |
| del lines[0] |
| desc = "" |
| while len(lines) > 0: |
| l = lines[0] |
| while len(l) > 0 and l[0] == '*': |
| l = l[1:] |
| l = l.strip() |
| desc = desc + " " + l |
| del lines[0] |
| |
| desc = desc.strip() |
| |
| if quiet == 0: |
| if desc == "": |
| self.warning("Macro comment for %s lack description of the macro" % (name)) |
| |
| return((args, desc)) |
| |
| # |
| # Parse a comment block and merge the information found in the |
| # parameters descriptions, finally returns a block as complete |
| # as possible |
| # |
| def mergeFunctionComment(self, name, description, quiet = 0): |
| if name == 'main': |
| quiet = 1 |
| if name[0:2] == '__': |
| quiet = 1 |
| |
| (ret, args) = description |
| desc = "" |
| retdesc = "" |
| |
| if self.comment == None: |
| if not quiet: |
| self.warning("Missing comment for function %s" % (name)) |
| return(((ret[0], retdesc), args, desc)) |
| if self.comment[0] != '*': |
| if not quiet: |
| self.warning("Missing * in function comment for %s" % (name)) |
| return(((ret[0], retdesc), args, desc)) |
| lines = self.comment.split('\n') |
| if lines[0] == '*': |
| del lines[0] |
| if lines[0] != "* %s:" % (name): |
| if not quiet: |
| self.warning("Misformatted function comment for %s" % (name)) |
| self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) |
| return(((ret[0], retdesc), args, desc)) |
| del lines[0] |
| while lines[0] == '*': |
| del lines[0] |
| nbargs = len(args) |
| while len(lines) > 0 and lines[0][0:3] == '* @': |
| l = lines[0][3:] |
| try: |
| (arg, desc) = l.split(':', 1) |
| desc=desc.strip() |
| arg=arg.strip() |
| except: |
| if not quiet: |
| self.warning("Misformatted function comment for %s" % (name)) |
| self.warning(" problem with '%s'" % (lines[0])) |
| del lines[0] |
| continue |
| del lines[0] |
| l = lines[0].strip() |
| while len(l) > 2 and l[0:3] != '* @': |
| while l[0] == '*': |
| l = l[1:] |
| desc = desc + ' ' + l.strip() |
| del lines[0] |
| if len(lines) == 0: |
| break |
| l = lines[0] |
| i = 0 |
| while i < nbargs: |
| if args[i][1] == arg: |
| args[i] = (args[i][0], arg, desc) |
| break; |
| i = i + 1 |
| if i >= nbargs: |
| if not quiet: |
| self.warning("Unable to find arg %s from function comment for %s" % ( |
| arg, name)) |
| while len(lines) > 0 and lines[0] == '*': |
| del lines[0] |
| desc = "" |
| while len(lines) > 0: |
| l = lines[0] |
| while len(l) > 0 and l[0] == '*': |
| l = l[1:] |
| l = l.strip() |
| if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": |
| try: |
| l = l.split(' ', 1)[1] |
| except: |
| l = "" |
| retdesc = l.strip() |
| del lines[0] |
| while len(lines) > 0: |
| l = lines[0] |
| while len(l) > 0 and l[0] == '*': |
| l = l[1:] |
| l = l.strip() |
| retdesc = retdesc + " " + l |
| del lines[0] |
| else: |
| desc = desc + " " + l |
| del lines[0] |
| |
| retdesc = retdesc.strip() |
| desc = desc.strip() |
| |
| if quiet == 0: |
| # |
| # report missing comments |
| # |
| i = 0 |
| while i < nbargs: |
| if args[i][2] == None and args[i][0] != "void" and \ |
| ((args[i][1] != None) or (args[i][1] == '')): |
| self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) |
| i = i + 1 |
| if retdesc == "" and ret[0] != "void": |
| self.warning("Function comment for %s lacks description of return value" % (name)) |
| if desc == "": |
| self.warning("Function comment for %s lacks description of the function" % (name)) |
| |
| return(((ret[0], retdesc), args, desc)) |
| |
| def parsePreproc(self, token): |
| if debug: |
| print("=> preproc ", token, self.lexer.tokens) |
| name = token[1] |
| if name == "#include": |
| token = self.lexer.token() |
| if token == None: |
| return None |
| if token[0] == 'preproc': |
| self.index_add(token[1], self.filename, not self.is_header, |
| "include") |
| return self.lexer.token() |
| return token |
| if name == "#define": |
| token = self.lexer.token() |
| if token == None: |
| return None |
| if token[0] == 'preproc': |
| # TODO macros with arguments |
| name = token[1] |
| lst = [] |
| token = self.lexer.token() |
| while token != None and token[0] == 'preproc' and \ |
| token[1][0] != '#': |
| lst.append(token[1]) |
| token = self.lexer.token() |
| try: |
| name = name.split('(') [0] |
| except: |
| pass |
| info = self.parseMacroComment(name, not self.is_header) |
| self.index_add(name, self.filename, not self.is_header, |
| "macro", info) |
| return token |
| |
| # |
| # Processing of conditionals modified by Bill 1/1/05 |
| # |
| # We process conditionals (i.e. tokens from #ifdef, #ifndef, |
| # #if, #else and #endif) for headers and mainline code, |
| # store the ones from the header in libxml2-api.xml, and later |
| # (in the routine merge_public) verify that the two (header and |
| # mainline code) agree. |
| # |
| # There is a small problem with processing the headers. Some of |
| # the variables are not concerned with enabling / disabling of |
| # library functions (e.g. '__XML_PARSER_H__'), and we don't want |
| # them to be included in libxml2-api.xml, or involved in |
| # the check between the header and the mainline code. To |
| # accomplish this, we ignore any conditional which doesn't include |
| # the string 'ENABLED' |
| # |
| if name == "#ifdef": |
| apstr = self.lexer.tokens[0][1] |
| try: |
| self.defines.append(apstr) |
| if apstr.find('ENABLED') != -1: |
| self.conditionals.append("defined(%s)" % apstr) |
| except: |
| pass |
| elif name == "#ifndef": |
| apstr = self.lexer.tokens[0][1] |
| try: |
| self.defines.append(apstr) |
| if apstr.find('ENABLED') != -1: |
| self.conditionals.append("!defined(%s)" % apstr) |
| except: |
| pass |
| elif name == "#if": |
| apstr = "" |
| for tok in self.lexer.tokens: |
| if apstr != "": |
| apstr = apstr + " " |
| apstr = apstr + tok[1] |
| try: |
| self.defines.append(apstr) |
| if apstr.find('ENABLED') != -1: |
| self.conditionals.append(apstr) |
| except: |
| pass |
| elif name == "#else": |
| if self.conditionals != [] and \ |
| self.defines[-1].find('ENABLED') != -1: |
| self.conditionals[-1] = "!(%s)" % self.conditionals[-1] |
| elif name == "#endif": |
| if self.conditionals != [] and \ |
| self.defines[-1].find('ENABLED') != -1: |
| self.conditionals = self.conditionals[:-1] |
| self.defines = self.defines[:-1] |
| token = self.lexer.token() |
| while token != None and token[0] == 'preproc' and \ |
| token[1][0] != '#': |
| token = self.lexer.token() |
| return token |
| |
| # |
| # token acquisition on top of the lexer, it handle internally |
| # preprocessor and comments since they are logically not part of |
| # the program structure. |
| # |
| def token(self): |
| global ignored_words |
| |
| token = self.lexer.token() |
| while token != None: |
| if token[0] == 'comment': |
| token = self.parseComment(token) |
| continue |
| elif token[0] == 'preproc': |
| token = self.parsePreproc(token) |
| continue |
| elif token[0] == "name" and token[1] == "__const": |
| token = ("name", "const") |
| return token |
| elif token[0] == "name" and token[1] == "__attribute": |
| token = self.lexer.token() |
| while token != None and token[1] != ";": |
| token = self.lexer.token() |
| return token |
| elif token[0] == "name" and token[1] in ignored_words: |
| (n, info) = ignored_words[token[1]] |
| i = 0 |
| while i < n: |
| token = self.lexer.token() |
| i = i + 1 |
| token = self.lexer.token() |
| continue |
| else: |
| if debug: |
| print("=> ", token) |
| return token |
| return None |
| |
| # |
| # Parse a typedef, it records the type and its name. |
| # |
| def parseTypedef(self, token): |
| if token == None: |
| return None |
| token = self.parseType(token) |
| if token == None: |
| self.error("parsing typedef") |
| return None |
| base_type = self.type |
| type = base_type |
| #self.debug("end typedef type", token) |
| while token != None: |
| if token[0] == "name": |
| name = token[1] |
| signature = self.signature |
| if signature != None: |
| type = type.split('(')[0] |
| d = self.mergeFunctionComment(name, |
| ((type, None), signature), 1) |
| self.index_add(name, self.filename, not self.is_header, |
| "functype", d) |
| else: |
| if base_type == "struct": |
| self.index_add(name, self.filename, not self.is_header, |
| "struct", type) |
| base_type = "struct " + name |
| else: |
| # TODO report missing or misformatted comments |
| info = self.parseTypeComment(name, 1) |
| self.index_add(name, self.filename, not self.is_header, |
| "typedef", type, info) |
| token = self.token() |
| else: |
| self.error("parsing typedef: expecting a name") |
| return token |
| #self.debug("end typedef", token) |
| if token != None and token[0] == 'sep' and token[1] == ',': |
| type = base_type |
| token = self.token() |
| while token != None and token[0] == "op": |
| type = type + token[1] |
| token = self.token() |
| elif token != None and token[0] == 'sep' and token[1] == ';': |
| break; |
| elif token != None and token[0] == 'name': |
| type = base_type |
| continue; |
| else: |
| self.error("parsing typedef: expecting ';'", token) |
| return token |
| token = self.token() |
| return token |
| |
| # |
| # Parse a C code block, used for functions it parse till |
| # the balancing } included |
| # |
| def parseBlock(self, token): |
| while token != None: |
| if token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseBlock(token) |
| elif token[0] == "sep" and token[1] == "}": |
| self.comment = None |
| token = self.token() |
| return token |
| else: |
| if self.collect_ref == 1: |
| oldtok = token |
| token = self.token() |
| if oldtok[0] == "name" and oldtok[1][0:3] == "xml": |
| if token[0] == "sep" and token[1] == "(": |
| self.index_add_ref(oldtok[1], self.filename, |
| 0, "function") |
| token = self.token() |
| elif token[0] == "name": |
| token = self.token() |
| if token[0] == "sep" and (token[1] == ";" or |
| token[1] == "," or token[1] == "="): |
| self.index_add_ref(oldtok[1], self.filename, |
| 0, "type") |
| elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": |
| self.index_add_ref(oldtok[1], self.filename, |
| 0, "typedef") |
| elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": |
| self.index_add_ref(oldtok[1], self.filename, |
| 0, "typedef") |
| |
| else: |
| token = self.token() |
| return token |
| |
| # |
| # Parse a C struct definition till the balancing } |
| # |
| def parseStruct(self, token): |
| fields = [] |
| #self.debug("start parseStruct", token) |
| while token != None: |
| if token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseTypeBlock(token) |
| elif token[0] == "sep" and token[1] == "}": |
| self.struct_fields = fields |
| #self.debug("end parseStruct", token) |
| #print fields |
| token = self.token() |
| return token |
| else: |
| base_type = self.type |
| #self.debug("before parseType", token) |
| token = self.parseType(token) |
| #self.debug("after parseType", token) |
| if token != None and token[0] == "name": |
| fname = token[1] |
| token = self.token() |
| if token[0] == "sep" and token[1] == ";": |
| self.comment = None |
| token = self.token() |
| fields.append((self.type, fname, self.comment)) |
| self.comment = None |
| else: |
| self.error("parseStruct: expecting ;", token) |
| elif token != None and token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseTypeBlock(token) |
| if token != None and token[0] == "name": |
| token = self.token() |
| if token != None and token[0] == "sep" and token[1] == ";": |
| token = self.token() |
| else: |
| self.error("parseStruct: expecting ;", token) |
| else: |
| self.error("parseStruct: name", token) |
| token = self.token() |
| self.type = base_type; |
| self.struct_fields = fields |
| #self.debug("end parseStruct", token) |
| #print fields |
| return token |
| |
| # |
| # Parse a C enum block, parse till the balancing } |
| # |
| def parseEnumBlock(self, token): |
| self.enums = [] |
| name = None |
| self.comment = None |
| comment = "" |
| value = "0" |
| while token != None: |
| if token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseTypeBlock(token) |
| elif token[0] == "sep" and token[1] == "}": |
| if name != None: |
| if self.comment != None: |
| comment = self.comment |
| self.comment = None |
| self.enums.append((name, value, comment)) |
| token = self.token() |
| return token |
| elif token[0] == "name": |
| if name != None: |
| if self.comment != None: |
| comment = self.comment.strip() |
| self.comment = None |
| self.enums.append((name, value, comment)) |
| name = token[1] |
| comment = "" |
| token = self.token() |
| if token[0] == "op" and token[1][0] == "=": |
| value = "" |
| if len(token[1]) > 1: |
| value = token[1][1:] |
| token = self.token() |
| while token[0] != "sep" or (token[1] != ',' and |
| token[1] != '}'): |
| value = value + token[1] |
| token = self.token() |
| else: |
| try: |
| value = "%d" % (int(value) + 1) |
| except: |
| self.warning("Failed to compute value of enum %s" % (name)) |
| value="" |
| if token[0] == "sep" and token[1] == ",": |
| token = self.token() |
| else: |
| token = self.token() |
| return token |
| |
| # |
| # Parse a C definition block, used for structs it parse till |
| # the balancing } |
| # |
| def parseTypeBlock(self, token): |
| while token != None: |
| if token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseTypeBlock(token) |
| elif token[0] == "sep" and token[1] == "}": |
| token = self.token() |
| return token |
| else: |
| token = self.token() |
| return token |
| |
| # |
| # Parse a type: the fact that the type name can either occur after |
| # the definition or within the definition makes it a little harder |
| # if inside, the name token is pushed back before returning |
| # |
| def parseType(self, token): |
| self.type = "" |
| self.struct_fields = [] |
| self.signature = None |
| if token == None: |
| return token |
| |
| while token[0] == "name" and ( |
| token[1] == "const" or \ |
| token[1] == "unsigned" or \ |
| token[1] == "signed"): |
| if self.type == "": |
| self.type = token[1] |
| else: |
| self.type = self.type + " " + token[1] |
| token = self.token() |
| |
| if token[0] == "name" and (token[1] == "long" or token[1] == "short"): |
| if self.type == "": |
| self.type = token[1] |
| else: |
| self.type = self.type + " " + token[1] |
| if token[0] == "name" and token[1] == "int": |
| if self.type == "": |
| self.type = tmp[1] |
| else: |
| self.type = self.type + " " + tmp[1] |
| |
| elif token[0] == "name" and token[1] == "struct": |
| if self.type == "": |
| self.type = token[1] |
| else: |
| self.type = self.type + " " + token[1] |
| token = self.token() |
| nametok = None |
| if token[0] == "name": |
| nametok = token |
| token = self.token() |
| if token != None and token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseStruct(token) |
| elif token != None and token[0] == "op" and token[1] == "*": |
| self.type = self.type + " " + nametok[1] + " *" |
| token = self.token() |
| while token != None and token[0] == "op" and token[1] == "*": |
| self.type = self.type + " *" |
| token = self.token() |
| if token[0] == "name": |
| nametok = token |
| token = self.token() |
| else: |
| self.error("struct : expecting name", token) |
| return token |
| elif token != None and token[0] == "name" and nametok != None: |
| self.type = self.type + " " + nametok[1] |
| return token |
| |
| if nametok != None: |
| self.lexer.push(token) |
| token = nametok |
| return token |
| |
| elif token[0] == "name" and token[1] == "enum": |
| if self.type == "": |
| self.type = token[1] |
| else: |
| self.type = self.type + " " + token[1] |
| self.enums = [] |
| token = self.token() |
| if token != None and token[0] == "sep" and token[1] == "{": |
| token = self.token() |
| token = self.parseEnumBlock(token) |
| else: |
| self.error("parsing enum: expecting '{'", token) |
| enum_type = None |
| if token != None and token[0] != "name": |
| self.lexer.push(token) |
| token = ("name", "enum") |
| else: |
| enum_type = token[1] |
| for enum in self.enums: |
| self.index_add(enum[0], self.filename, |
| not self.is_header, "enum", |
| (enum[1], enum[2], enum_type)) |
| return token |
| |
| elif token[0] == "name": |
| if self.type == "": |
| self.type = token[1] |
| else: |
| self.type = self.type + " " + token[1] |
| else: |
| self.error("parsing type %s: expecting a name" % (self.type), |
| token) |
| return token |
| token = self.token() |
| while token != None and (token[0] == "op" or |
| token[0] == "name" and token[1] == "const"): |
| self.type = self.type + " " + token[1] |
| token = self.token() |
| |
| # |
| # if there is a parenthesis here, this means a function type |
| # |
| if token != None and token[0] == "sep" and token[1] == '(': |
| self.type = self.type + token[1] |
| token = self.token() |
| while token != None and token[0] == "op" and token[1] == '*': |
| self.type = self.type + token[1] |
| token = self.token() |
| if token == None or token[0] != "name" : |
| self.error("parsing function type, name expected", token); |
| return token |
| self.type = self.type + token[1] |
| nametok = token |
| token = self.token() |
| if token != None and token[0] == "sep" and token[1] == ')': |
| self.type = self.type + token[1] |
| token = self.token() |
| if token != None and token[0] == "sep" and token[1] == '(': |
| token = self.token() |
| type = self.type; |
| token = self.parseSignature(token); |
| self.type = type; |
| else: |
| self.error("parsing function type, '(' expected", token); |
| return token |
| else: |
| self.error("parsing function type, ')' expected", token); |
| return token |
| self.lexer.push(token) |
| token = nametok |
| return token |
| |
| # |
| # do some lookahead for arrays |
| # |
| if token != None and token[0] == "name": |
| nametok = token |
| token = self.token() |
| if token != None and token[0] == "sep" and token[1] == '[': |
| self.type = self.type + nametok[1] |
| while token != None and token[0] == "sep" and token[1] == '[': |
| self.type = self.type + token[1] |
| token = self.token() |
| while token != None and token[0] != 'sep' and \ |
| token[1] != ']' and token[1] != ';': |
| self.type = self.type + token[1] |
| token = self.token() |
| if token != None and token[0] == 'sep' and token[1] == ']': |
| self.type = self.type + token[1] |
| token = self.token() |
| else: |
| self.error("parsing array type, ']' expected", token); |
| return token |
| elif token != None and token[0] == "sep" and token[1] == ':': |
| # remove :12 in case it's a limited int size |
| token = self.token() |
| token = self.token() |
| self.lexer.push(token) |
| token = nametok |
| |
| return token |
| |
| # |
| # Parse a signature: '(' has been parsed and we scan the type definition |
| # up to the ')' included |
| def parseSignature(self, token): |
| signature = [] |
| if token != None and token[0] == "sep" and token[1] == ')': |
| self.signature = [] |
| token = self.token() |
| return token |
| while token != None: |
| token = self.parseType(token) |
| if token != None and token[0] == "name": |
| signature.append((self.type, token[1], None)) |
| token = self.token() |
| elif token != None and token[0] == "sep" and token[1] == ',': |
| token = self.token() |
| continue |
| elif token != None and token[0] == "sep" and token[1] == ')': |
| # only the type was provided |
| if self.type == "...": |
| signature.append((self.type, "...", None)) |
| else: |
| signature.append((self.type, None, None)) |
| if token != None and token[0] == "sep": |
| if token[1] == ',': |
| token = self.token() |
| continue |
| elif token[1] == ')': |
| token = self.token() |
| break |
| self.signature = signature |
| return token |
| |
| # |
| # Parse a global definition, be it a type, variable or function |
| # the extern "C" blocks are a bit nasty and require it to recurse. |
| # |
| def parseGlobal(self, token): |
| static = 0 |
| if token[1] == 'extern': |
| token = self.token() |
| if token == None: |
| return token |
| if token[0] == 'string': |
| if token[1] == 'C': |
| token = self.token() |
| if token == None: |
| return token |
| if token[0] == 'sep' and token[1] == "{": |
| token = self.token() |
| # print 'Entering extern "C line ', self.lineno() |
| while token != None and (token[0] != 'sep' or |
| token[1] != "}"): |
| if token[0] == 'name': |
| token = self.parseGlobal(token) |
| else: |
| self.error( |
| "token %s %s unexpected at the top level" % ( |
| token[0], token[1])) |
| token = self.parseGlobal(token) |
| # print 'Exiting extern "C" line', self.lineno() |
| token = self.token() |
| return token |
| else: |
| return token |
| elif token[1] == 'static': |
| static = 1 |
| token = self.token() |
| if token == None or token[0] != 'name': |
| return token |
| |
| if token[1] == 'typedef': |
| token = self.token() |
| return self.parseTypedef(token) |
| else: |
| token = self.parseType(token) |
| type_orig = self.type |
| if token == None or token[0] != "name": |
| return token |
| type = type_orig |
| self.name = token[1] |
| token = self.token() |
| while token != None and (token[0] == "sep" or token[0] == "op"): |
| if token[0] == "sep": |
| if token[1] == "[": |
| type = type + token[1] |
| token = self.token() |
| while token != None and (token[0] != "sep" or \ |
| token[1] != ";"): |
| type = type + token[1] |
| token = self.token() |
| |
| if token != None and token[0] == "op" and token[1] == "=": |
| # |
| # Skip the initialization of the variable |
| # |
| token = self.token() |
| if token[0] == 'sep' and token[1] == '{': |
| token = self.token() |
| token = self.parseBlock(token) |
| else: |
| self.comment = None |
| while token != None and (token[0] != "sep" or \ |
| (token[1] != ';' and token[1] != ',')): |
| token = self.token() |
| self.comment = None |
| if token == None or token[0] != "sep" or (token[1] != ';' and |
| token[1] != ','): |
| self.error("missing ';' or ',' after value") |
| |
| if token != None and token[0] == "sep": |
| if token[1] == ";": |
| self.comment = None |
| token = self.token() |
| if type == "struct": |
| self.index_add(self.name, self.filename, |
| not self.is_header, "struct", self.struct_fields) |
| else: |
| self.index_add(self.name, self.filename, |
| not self.is_header, "variable", type) |
| break |
| elif token[1] == "(": |
| token = self.token() |
| token = self.parseSignature(token) |
| if token == None: |
| return None |
| if token[0] == "sep" and token[1] == ";": |
| d = self.mergeFunctionComment(self.name, |
| ((type, None), self.signature), 1) |
| self.index_add(self.name, self.filename, static, |
| "function", d) |
| token = self.token() |
| elif token[0] == "sep" and token[1] == "{": |
| d = self.mergeFunctionComment(self.name, |
| ((type, None), self.signature), static) |
| self.index_add(self.name, self.filename, static, |
| "function", d) |
| token = self.token() |
| token = self.parseBlock(token); |
| elif token[1] == ',': |
| self.comment = None |
| self.index_add(self.name, self.filename, static, |
| "variable", type) |
| type = type_orig |
| token = self.token() |
| while token != None and token[0] == "sep": |
| type = type + token[1] |
| token = self.token() |
| if token != None and token[0] == "name": |
| self.name = token[1] |
| token = self.token() |
| else: |
| break |
| |
| return token |
| |
| def parse(self): |
| self.warning("Parsing %s" % (self.filename)) |
| token = self.token() |
| while token != None: |
| if token[0] == 'name': |
| token = self.parseGlobal(token) |
| else: |
| self.error("token %s %s unexpected at the top level" % ( |
| token[0], token[1])) |
| token = self.parseGlobal(token) |
| return |
| self.parseTopComment(self.top_comment) |
| return self.index |
| |
| |
| class docBuilder: |
| """A documentation builder""" |
| def __init__(self, name, directories=['.'], excludes=[]): |
| self.name = name |
| self.directories = directories |
| self.excludes = excludes + list(ignored_files.keys()) |
| self.modules = {} |
| self.headers = {} |
| self.idx = index() |
| self.xref = {} |
| self.index = {} |
| if name == 'libxml2': |
| self.basename = 'libxml' |
| else: |
| self.basename = name |
| |
| def indexString(self, id, str): |
| if str == None: |
| return |
| str = str.replace("'", ' ') |
| str = str.replace('"', ' ') |
| str = str.replace("/", ' ') |
| str = str.replace('*', ' ') |
| str = str.replace("[", ' ') |
| str = str.replace("]", ' ') |
| str = str.replace("(", ' ') |
| str = str.replace(")", ' ') |
| str = str.replace("<", ' ') |
| str = str.replace('>', ' ') |
| str = str.replace("&", ' ') |
| str = str.replace('#', ' ') |
| str = str.replace(",", ' ') |
| str = str.replace('.', ' ') |
| str = str.replace(';', ' ') |
| tokens = str.split() |
| for token in tokens: |
| try: |
| c = token[0] |
| if string.ascii_letters.find(c) < 0: |
| pass |
| elif len(token) < 3: |
| pass |
| else: |
| lower = token.lower() |
| # TODO: generalize this a bit |
| if lower == 'and' or lower == 'the': |
| pass |
| elif token in self.xref: |
| self.xref[token].append(id) |
| else: |
| self.xref[token] = [id] |
| except: |
| pass |
| |
| def analyze(self): |
| print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) |
| self.idx.analyze() |
| |
| def scanHeaders(self): |
| for header in list(self.headers.keys()): |
| parser = CParser(header) |
| idx = parser.parse() |
| self.headers[header] = idx; |
| self.idx.merge(idx) |
| |
| def scanModules(self): |
| for module in list(self.modules.keys()): |
| parser = CParser(module) |
| idx = parser.parse() |
| # idx.analyze() |
| self.modules[module] = idx |
| self.idx.merge_public(idx) |
| |
| def scan(self): |
| for directory in self.directories: |
| files = glob.glob(directory + "/*.c") |
| for file in files: |
| skip = 0 |
| for excl in self.excludes: |
| if file.find(excl) != -1: |
| print("Skipping %s" % file) |
| skip = 1 |
| break |
| if skip == 0: |
| self.modules[file] = None; |
| files = glob.glob(directory + "/*.h") |
| for file in files: |
| skip = 0 |
| for excl in self.excludes: |
| if file.find(excl) != -1: |
| print("Skipping %s" % file) |
| skip = 1 |
| break |
| if skip == 0: |
| self.headers[file] = None; |
| self.scanHeaders() |
| self.scanModules() |
| |
| def modulename_file(self, file): |
| module = os.path.basename(file) |
| if module[-2:] == '.h': |
| module = module[:-2] |
| elif module[-2:] == '.c': |
| module = module[:-2] |
| return module |
| |
| def serialize_enum(self, output, name): |
| id = self.idx.enums[name] |
| output.write(" <enum name='%s' file='%s'" % (name, |
| self.modulename_file(id.header))) |
| if id.info != None: |
| info = id.info |
| if info[0] != None and info[0] != '': |
| try: |
| val = eval(info[0]) |
| except: |
| val = info[0] |
| output.write(" value='%s'" % (val)); |
| if info[2] != None and info[2] != '': |
| output.write(" type='%s'" % info[2]); |
| if info[1] != None and info[1] != '': |
| output.write(" info='%s'" % escape(info[1])); |
| output.write("/>\n") |
| |
| def serialize_macro(self, output, name): |
| id = self.idx.macros[name] |
| output.write(" <macro name='%s' file='%s'>\n" % (name, |
| self.modulename_file(id.header))) |
| if id.info != None: |
| try: |
| (args, desc) = id.info |
| if desc != None and desc != "": |
| output.write(" <info>%s</info>\n" % (escape(desc))) |
| self.indexString(name, desc) |
| for arg in args: |
| (name, desc) = arg |
| if desc != None and desc != "": |
| output.write(" <arg name='%s' info='%s'/>\n" % ( |
| name, escape(desc))) |
| self.indexString(name, desc) |
| else: |
| output.write(" <arg name='%s'/>\n" % (name)) |
| except: |
| pass |
| output.write(" </macro>\n") |
| |
| def serialize_typedef(self, output, name): |
| id = self.idx.typedefs[name] |
| if id.info[0:7] == 'struct ': |
| output.write(" <struct name='%s' file='%s' type='%s'" % ( |
| name, self.modulename_file(id.header), id.info)) |
| name = id.info[7:] |
| if name in self.idx.structs and ( \ |
| type(self.idx.structs[name].info) == type(()) or |
| type(self.idx.structs[name].info) == type([])): |
| output.write(">\n"); |
| try: |
| for field in self.idx.structs[name].info: |
| desc = field[2] |
| self.indexString(name, desc) |
| if desc == None: |
| desc = '' |
| else: |
| desc = escape(desc) |
| output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) |
| except: |
| print("Failed to serialize struct %s" % (name)) |
| output.write(" </struct>\n") |
| else: |
| output.write("/>\n"); |
| else : |
| output.write(" <typedef name='%s' file='%s' type='%s'" % ( |
| name, self.modulename_file(id.header), id.info)) |
| try: |
| desc = id.extra |
| if desc != None and desc != "": |
| output.write(">\n <info>%s</info>\n" % (escape(desc))) |
| output.write(" </typedef>\n") |
| else: |
| output.write("/>\n") |
| except: |
| output.write("/>\n") |
| |
| def serialize_variable(self, output, name): |
| id = self.idx.variables[name] |
| if id.info != None: |
| output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( |
| name, self.modulename_file(id.header), id.info)) |
| else: |
| output.write(" <variable name='%s' file='%s'/>\n" % ( |
| name, self.modulename_file(id.header))) |
| |
| def serialize_function(self, output, name): |
| id = self.idx.functions[name] |
| if name == debugsym: |
| print("=>", id) |
| |
| output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, |
| name, self.modulename_file(id.header), |
| self.modulename_file(id.module))) |
| # |
| # Processing of conditionals modified by Bill 1/1/05 |
| # |
| if id.conditionals != None: |
| apstr = "" |
| for cond in id.conditionals: |
| if apstr != "": |
| apstr = apstr + " && " |
| apstr = apstr + cond |
| output.write(" <cond>%s</cond>\n"% (apstr)); |
| try: |
| (ret, params, desc) = id.info |
| if (desc == None or desc == '') and \ |
| name[0:9] != "xmlThrDef" and name != "xmlDllMain": |
| print("%s %s from %s has no description" % (id.type, name, |
| self.modulename_file(id.module))) |
| |
| output.write(" <info>%s</info>\n" % (escape(desc))) |
| self.indexString(name, desc) |
| if ret[0] != None: |
| if ret[0] == "void": |
| output.write(" <return type='void'/>\n") |
| else: |
| output.write(" <return type='%s' info='%s'/>\n" % ( |
| ret[0], escape(ret[1]))) |
| self.indexString(name, ret[1]) |
| for param in params: |
| if param[0] == 'void': |
| continue |
| if param[2] == None: |
| output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) |
| else: |
| output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) |
| self.indexString(name, param[2]) |
| except: |
| print("Failed to save function %s info: " % name, repr(id.info)) |
| output.write(" </%s>\n" % (id.type)) |
| |
| def serialize_exports(self, output, file): |
| module = self.modulename_file(file) |
| output.write(" <file name='%s'>\n" % (module)) |
| dict = self.headers[file] |
| if dict.info != None: |
| for data in ('Summary', 'Description', 'Author'): |
| try: |
| output.write(" <%s>%s</%s>\n" % ( |
| data.lower(), |
| escape(dict.info[data]), |
| data.lower())) |
| except: |
| print("Header %s lacks a %s description" % (module, data)) |
| if 'Description' in dict.info: |
| desc = dict.info['Description'] |
| if desc.find("DEPRECATED") != -1: |
| output.write(" <deprecated/>\n") |
| |
| ids = list(dict.macros.keys()) |
| ids.sort() |
| for id in uniq(ids): |
| # Macros are sometime used to masquerade other types. |
| if id in dict.functions: |
| continue |
| if id in dict.variables: |
| continue |
| if id in dict.typedefs: |
| continue |
| if id in dict.structs: |
| continue |
| if id in dict.enums: |
| continue |
| output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) |
| ids = list(dict.enums.keys()) |
| ids.sort() |
| for id in uniq(ids): |
| output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) |
| ids = list(dict.typedefs.keys()) |
| ids.sort() |
| for id in uniq(ids): |
| output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) |
| ids = list(dict.structs.keys()) |
| ids.sort() |
| for id in uniq(ids): |
| output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) |
| ids = list(dict.variables.keys()) |
| ids.sort() |
| for id in uniq(ids): |
| output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) |
| ids = list(dict.functions.keys()) |
| ids.sort() |
| for id in uniq(ids): |
| output.write(" <exports symbol='%s' type='function'/>\n" % (id)) |
| output.write(" </file>\n") |
| |
| def serialize_xrefs_files(self, output): |
| headers = list(self.headers.keys()) |
| headers.sort() |
| for file in headers: |
| module = self.modulename_file(file) |
| output.write(" <file name='%s'>\n" % (module)) |
| dict = self.headers[file] |
| ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \ |
| list(dict.macros.keys()) + list(dict.typedefs.keys()) + \ |
| list(dict.structs.keys()) + list(dict.enums.keys())) |
| ids.sort() |
| for id in ids: |
| output.write(" <ref name='%s'/>\n" % (id)) |
| output.write(" </file>\n") |
| pass |
| |
| def serialize_xrefs_functions(self, output): |
| funcs = {} |
| for name in list(self.idx.functions.keys()): |
| id = self.idx.functions[name] |
| try: |
| (ret, params, desc) = id.info |
| for param in params: |
| if param[0] == 'void': |
| continue |
| if param[0] in funcs: |
| funcs[param[0]].append(name) |
| else: |
| funcs[param[0]] = [name] |
| except: |
| pass |
| typ = list(funcs.keys()) |
| typ.sort() |
| for type in typ: |
| if type == '' or type == 'void' or type == "int" or \ |
| type == "char *" or type == "const char *" : |
| continue |
| output.write(" <type name='%s'>\n" % (type)) |
| ids = funcs[type] |
| ids.sort() |
| pid = '' # not sure why we have dups, but get rid of them! |
| for id in ids: |
| if id != pid: |
| output.write(" <ref name='%s'/>\n" % (id)) |
| pid = id |
| output.write(" </type>\n") |
| |
| def serialize_xrefs_constructors(self, output): |
| funcs = {} |
| for name in list(self.idx.functions.keys()): |
| id = self.idx.functions[name] |
| try: |
| (ret, params, desc) = id.info |
| if ret[0] == "void": |
| continue |
| if ret[0] in funcs: |
| funcs[ret[0]].append(name) |
| else: |
| funcs[ret[0]] = [name] |
| except: |
| pass |
| typ = list(funcs.keys()) |
| typ.sort() |
| for type in typ: |
| if type == '' or type == 'void' or type == "int" or \ |
| type == "char *" or type == "const char *" : |
| continue |
| output.write(" <type name='%s'>\n" % (type)) |
| ids = funcs[type] |
| ids.sort() |
| for id in ids: |
| output.write(" <ref name='%s'/>\n" % (id)) |
| output.write(" </type>\n") |
| |
| def serialize_xrefs_alpha(self, output): |
| letter = None |
| ids = list(self.idx.identifiers.keys()) |
| ids.sort() |
| for id in ids: |
| if id[0] != letter: |
| if letter != None: |
| output.write(" </letter>\n") |
| letter = id[0] |
| output.write(" <letter name='%s'>\n" % (letter)) |
| output.write(" <ref name='%s'/>\n" % (id)) |
| if letter != None: |
| output.write(" </letter>\n") |
| |
| def serialize_xrefs_references(self, output): |
| typ = list(self.idx.identifiers.keys()) |
| typ.sort() |
| for id in typ: |
| idf = self.idx.identifiers[id] |
| module = idf.header |
| output.write(" <reference name='%s' href='%s'/>\n" % (id, |
| 'html/' + self.basename + '-' + |
| self.modulename_file(module) + '.html#' + |
| id)) |
| |
| def serialize_xrefs_index(self, output): |
| index = self.xref |
| typ = list(index.keys()) |
| typ.sort() |
| letter = None |
| count = 0 |
| chunk = 0 |
| chunks = [] |
| for id in typ: |
| if len(index[id]) > 30: |
| continue |
| if id[0] != letter: |
| if letter == None or count > 200: |
| if letter != None: |
| output.write(" </letter>\n") |
| output.write(" </chunk>\n") |
| count = 0 |
| chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) |
| output.write(" <chunk name='chunk%s'>\n" % (chunk)) |
| first_letter = id[0] |
| chunk = chunk + 1 |
| elif letter != None: |
| output.write(" </letter>\n") |
| letter = id[0] |
| output.write(" <letter name='%s'>\n" % (letter)) |
| output.write(" <word name='%s'>\n" % (id)) |
| tokens = index[id]; |
| tokens.sort() |
| tok = None |
| for token in tokens: |
| if tok == token: |
| continue |
| tok = token |
| output.write(" <ref name='%s'/>\n" % (token)) |
| count = count + 1 |
| output.write(" </word>\n") |
| if letter != None: |
| output.write(" </letter>\n") |
| output.write(" </chunk>\n") |
| if count != 0: |
| chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) |
| output.write(" <chunks>\n") |
| for ch in chunks: |
| output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( |
| ch[0], ch[1], ch[2])) |
| output.write(" </chunks>\n") |
| |
| def serialize_xrefs(self, output): |
| output.write(" <references>\n") |
| self.serialize_xrefs_references(output) |
| output.write(" </references>\n") |
| output.write(" <alpha>\n") |
| self.serialize_xrefs_alpha(output) |
| output.write(" </alpha>\n") |
| output.write(" <constructors>\n") |
| self.serialize_xrefs_constructors(output) |
| output.write(" </constructors>\n") |
| output.write(" <functions>\n") |
| self.serialize_xrefs_functions(output) |
| output.write(" </functions>\n") |
| output.write(" <files>\n") |
| self.serialize_xrefs_files(output) |
| output.write(" </files>\n") |
| output.write(" <index>\n") |
| self.serialize_xrefs_index(output) |
| output.write(" </index>\n") |
| |
| def serialize(self): |
| filename = "%s-api.xml" % self.name |
| print("Saving XML description %s" % (filename)) |
| output = open(filename, "w") |
| output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') |
| output.write("<api name='%s'>\n" % self.name) |
| output.write(" <files>\n") |
| headers = list(self.headers.keys()) |
| headers.sort() |
| for file in headers: |
| self.serialize_exports(output, file) |
| output.write(" </files>\n") |
| output.write(" <symbols>\n") |
| macros = list(self.idx.macros.keys()) |
| macros.sort() |
| for macro in macros: |
| self.serialize_macro(output, macro) |
| enums = list(self.idx.enums.keys()) |
| enums.sort() |
| for enum in enums: |
| self.serialize_enum(output, enum) |
| typedefs = list(self.idx.typedefs.keys()) |
| typedefs.sort() |
| for typedef in typedefs: |
| self.serialize_typedef(output, typedef) |
| variables = list(self.idx.variables.keys()) |
| variables.sort() |
| for variable in variables: |
| self.serialize_variable(output, variable) |
| functions = list(self.idx.functions.keys()) |
| functions.sort() |
| for function in functions: |
| self.serialize_function(output, function) |
| output.write(" </symbols>\n") |
| output.write("</api>\n") |
| output.close() |
| |
| filename = "%s-refs.xml" % self.name |
| print("Saving XML Cross References %s" % (filename)) |
| output = open(filename, "w") |
| output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') |
| output.write("<apirefs name='%s'>\n" % self.name) |
| self.serialize_xrefs(output) |
| output.write("</apirefs>\n") |
| output.close() |
| |
| |
| def rebuild(): |
| builder = None |
| if glob.glob("parser.c") != [] : |
| print("Rebuilding API description for libxml2") |
| builder = docBuilder("libxml2", [".", "."], |
| ["xmlwin32version.h", "tst.c"]) |
| elif glob.glob("../parser.c") != [] : |
| print("Rebuilding API description for libxml2") |
| builder = docBuilder("libxml2", ["..", "../include/libxml"], |
| ["xmlwin32version.h", "tst.c"]) |
| elif glob.glob("../libxslt/transform.c") != [] : |
| print("Rebuilding API description for libxslt") |
| builder = docBuilder("libxslt", ["../libxslt"], |
| ["win32config.h", "libxslt.h", "tst.c"]) |
| else: |
| print("rebuild() failed, unable to guess the module") |
| return None |
| builder.scan() |
| builder.analyze() |
| builder.serialize() |
| if glob.glob("../libexslt/exslt.c") != [] : |
| extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) |
| extra.scan() |
| extra.analyze() |
| extra.serialize() |
| return builder |
| |
| # |
| # for debugging the parser |
| # |
| def parse(filename): |
| parser = CParser(filename) |
| idx = parser.parse() |
| return idx |
| |
| if __name__ == "__main__": |
| if len(sys.argv) > 1: |
| debug = 1 |
| parse(sys.argv[1]) |
| else: |
| rebuild() |