blob: 37883aba42e30772d27a126c0e86e56dc54c95c2 [file] [log] [blame]
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /><link rel="SHORTCUT ICON" href="/favicon.ico" /><style type="text/css">
TD {font-family: Verdana,Arial,Helvetica}
BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em}
H1 {font-family: Verdana,Arial,Helvetica}
H2 {font-family: Verdana,Arial,Helvetica}
H3 {font-family: Verdana,Arial,Helvetica}
A:link, A:visited, A:active { text-decoration: underline }
</style><title>A real example</title></head><body bgcolor="#8b7765" text="#000000" link="#a06060" vlink="#000000"><table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr><td width="120"><a href="http://swpat.ffii.org/"><img src="epatents.png" alt="Action against software patents" /></a></td><td width="180"><a href="http://www.gnome.org/"><img src="gnome2.png" alt="Gnome2 Logo" /></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo" /></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo" /></a><div align="left"><a href="http://xmlsoft.org/"><img src="Libxml2-Logo-180x168.gif" alt="Made with Libxml2 Logo" /></a></div></td><td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"><h1>The XML C parser and toolkit of Gnome</h1><h2>A real example</h2></td></tr></table></td></tr></table></td></tr></table><table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr><td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Developer Menu</b></center></td></tr><tr><td bgcolor="#fffacd"><form action="search.php" enctype="application/x-www-form-urlencoded" method="get"><input name="query" type="text" size="20" value="" /><input name="submit" type="submit" value="Search ..." /></form><ul><li><a href="index.html" style="font-weight:bold">Main Menu</a></li><li><a href="html/index.html" style="font-weight:bold">Reference Manual</a></li><li><a href="examples/index.html" style="font-weight:bold">Code Examples</a></li><li><a href="guidelines.html">XML Guidelines</a></li><li><a href="tutorial/index.html">Tutorial</a></li><li><a href="xmlreader.html">The Reader Interface</a></li><li><a href="ChangeLog.html">ChangeLog</a></li><li><a href="XSLT.html">XSLT</a></li><li><a href="python.html">Python and bindings</a></li><li><a href="architecture.html">libxml2 architecture</a></li><li><a href="tree.html">The tree output</a></li><li><a href="interface.html">The SAX interface</a></li><li><a href="xmlmem.html">Memory Management</a></li><li><a href="xmlio.html">I/O Interfaces</a></li><li><a href="library.html">The parser interfaces</a></li><li><a href="entities.html">Entities or no entities</a></li><li><a href="namespaces.html">Namespaces</a></li><li><a href="upgrade.html">Upgrading 1.x code</a></li><li><a href="threads.html">Thread safety</a></li><li><a href="DOM.html">DOM Principles</a></li><li><a href="example.html">A real example</a></li><li><a href="xml.html">flat page</a>, <a href="site.xsl">stylesheet</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>API Indexes</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="APIchunk0.html">Alphabetic</a></li><li><a href="APIconstructors.html">Constructors</a></li><li><a href="APIfunctions.html">Functions/Types</a></li><li><a href="APIfiles.html">Modules</a></li><li><a href="APIsymbols.html">Symbols</a></li></ul></td></tr></table><table width="100%" border="0" cellspacing="1" cellpadding="3"><tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr><tr><td bgcolor="#fffacd"><ul><li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li><li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li><li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li><li><a href="http://www.aleksey.com/xmlsec/">XML-DSig xmlsec</a></li><li><a href="ftp://xmlsoft.org/">FTP</a></li><li><a href="http://www.zlatkovic.com/projects/libxml/">Windows binaries</a></li><li><a href="http://www.blastwave.org/packages.php/libxml2">Solaris binaries</a></li><li><a href="http://www.explain.com.au/oss/libxml2xslt.html">MacOsX binaries</a></li><li><a href="http://libxmlplusplus.sourceforge.net/">C++ bindings</a></li><li><a href="http://www.zend.com/php5/articles/php5-xmlphp.php#Heading4">PHP bindings</a></li><li><a href="http://sourceforge.net/projects/libxml2-pas/">Pascal bindings</a></li><li><a href="http://libxml.rubyforge.org/">Ruby bindings</a></li><li><a href="http://tclxml.sourceforge.net/">Tcl bindings</a></li><li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml2">Bug Tracker</a></li></ul></td></tr></table></td></tr></table></td><td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"><p>Here is a real size example, where the actual content of the
applicationdata is not kept in the DOM tree but uses internal structures. It
is based ona proposal to keep a database of jobs related to Gnome, with an
XML basedstorage structure. Here is an <a href="gjobs.xml">XML encoded
jobsbase</a>:</p><pre>&lt;?xml version="1.0"?&gt;
&lt;gjob:Helping xmlns:gjob="http://www.gnome.org/some-location"&gt;
&lt;gjob:Jobs&gt;
&lt;gjob:Job&gt;
&lt;gjob:Project ID="3"/&gt;
&lt;gjob:Application&gt;GBackup&lt;/gjob:Application&gt;
&lt;gjob:Category&gt;Development&lt;/gjob:Category&gt;
&lt;gjob:Update&gt;
&lt;gjob:Status&gt;Open&lt;/gjob:Status&gt;
&lt;gjob:Modified&gt;Mon, 07 Jun 1999 20:27:45 -0400 MET DST&lt;/gjob:Modified&gt;
&lt;gjob:Salary&gt;USD 0.00&lt;/gjob:Salary&gt;
&lt;/gjob:Update&gt;
&lt;gjob:Developers&gt;
&lt;gjob:Developer&gt;
&lt;/gjob:Developer&gt;
&lt;/gjob:Developers&gt;
&lt;gjob:Contact&gt;
&lt;gjob:Person&gt;Nathan Clemons&lt;/gjob:Person&gt;
&lt;gjob:Email&gt;nathan@windsofstorm.net&lt;/gjob:Email&gt;
&lt;gjob:Company&gt;
&lt;/gjob:Company&gt;
&lt;gjob:Organisation&gt;
&lt;/gjob:Organisation&gt;
&lt;gjob:Webpage&gt;
&lt;/gjob:Webpage&gt;
&lt;gjob:Snailmail&gt;
&lt;/gjob:Snailmail&gt;
&lt;gjob:Phone&gt;
&lt;/gjob:Phone&gt;
&lt;/gjob:Contact&gt;
&lt;gjob:Requirements&gt;
The program should be released as free software, under the GPL.
&lt;/gjob:Requirements&gt;
&lt;gjob:Skills&gt;
&lt;/gjob:Skills&gt;
&lt;gjob:Details&gt;
A GNOME based system that will allow a superuser to configure
compressed and uncompressed files and/or file systems to be backed
up with a supported media in the system. This should be able to
perform via find commands generating a list of files that are passed
to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine
or via operations performed on the filesystem itself. Email
notification and GUI status display very important.
&lt;/gjob:Details&gt;
&lt;/gjob:Job&gt;
&lt;/gjob:Jobs&gt;
&lt;/gjob:Helping&gt;</pre><p>While loading the XML file into an internal DOM tree is a matter ofcalling
only a couple of functions, browsing the tree to gather the data andgenerate
the internal structures is harder, and more error prone.</p><p>The suggested principle is to be tolerant with respect to the
inputstructure. For example, the ordering of the attributes is not
significant,the XML specification is clear about it. It's also usually a good
idea not todepend on the order of the children of a given node, unless it
really makesthings harder. Here is some code to parse the information for a
person:</p><pre>/*
* A person record
*/
typedef struct person {
char *name;
char *email;
char *company;
char *organisation;
char *smail;
char *webPage;
char *phone;
} person, *personPtr;
/*
* And the code needed to parse it
*/
personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
personPtr ret = NULL;
DEBUG("parsePerson\n");
/*
* allocate the struct
*/
ret = (personPtr) malloc(sizeof(person));
if (ret == NULL) {
fprintf(stderr,"out of memory\n");
return(NULL);
}
memset(ret, 0, sizeof(person));
/* We don't care what the top level element name is */
cur = cur-&gt;xmlChildrenNode;
while (cur != NULL) {
if ((!strcmp(cur-&gt;name, "Person")) &amp;&amp; (cur-&gt;ns == ns))
ret-&gt;name = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
if ((!strcmp(cur-&gt;name, "Email")) &amp;&amp; (cur-&gt;ns == ns))
ret-&gt;email = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
cur = cur-&gt;next;
}
return(ret);
}</pre><p>Here are a couple of things to notice:</p><ul><li>Usually a recursive parsing style is the more convenient one: XML
datais by nature subject to repetitive constructs and usually exhibits
highlystructured patterns.</li>
<li>The two arguments of type <em>xmlDocPtr</em>and <em>xmlNsPtr</em>,i.e.
the pointer to the global XML document and the namespace reserved tothe
application. Document wide information are needed for example todecode
entities and it's a good coding practice to define a namespace foryour
application set of data and test that the element and attributesyou're
analyzing actually pertains to your application space. This isdone by a
simple equality test (cur-&gt;ns == ns).</li>
<li>To retrieve text and attributes value, you can use the
function<em>xmlNodeListGetString</em>to gather all the text and entity
referencenodes generated by the DOM output and produce an single text
string.</li>
</ul><p>Here is another piece of code used to parse another level of
thestructure:</p><pre>#include &lt;libxml/tree.h&gt;
/*
* a Description for a Job
*/
typedef struct job {
char *projectID;
char *application;
char *category;
personPtr contact;
int nbDevelopers;
personPtr developers[100]; /* using dynamic alloc is left as an exercise */
} job, *jobPtr;
/*
* And the code needed to parse it
*/
jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) {
jobPtr ret = NULL;
DEBUG("parseJob\n");
/*
* allocate the struct
*/
ret = (jobPtr) malloc(sizeof(job));
if (ret == NULL) {
fprintf(stderr,"out of memory\n");
return(NULL);
}
memset(ret, 0, sizeof(job));
/* We don't care what the top level element name is */
cur = cur-&gt;xmlChildrenNode;
while (cur != NULL) {
if ((!strcmp(cur-&gt;name, "Project")) &amp;&amp; (cur-&gt;ns == ns)) {
ret-&gt;projectID = xmlGetProp(cur, "ID");
if (ret-&gt;projectID == NULL) {
fprintf(stderr, "Project has no ID\n");
}
}
if ((!strcmp(cur-&gt;name, "Application")) &amp;&amp; (cur-&gt;ns == ns))
ret-&gt;application = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
if ((!strcmp(cur-&gt;name, "Category")) &amp;&amp; (cur-&gt;ns == ns))
ret-&gt;category = xmlNodeListGetString(doc, cur-&gt;xmlChildrenNode, 1);
if ((!strcmp(cur-&gt;name, "Contact")) &amp;&amp; (cur-&gt;ns == ns))
ret-&gt;contact = parsePerson(doc, ns, cur);
cur = cur-&gt;next;
}
return(ret);
}</pre><p>Once you are used to it, writing this kind of code is quite simple,
butboring. Ultimately, it could be possible to write stubbers taking either
Cdata structure definitions, a set of XML examples or an XML DTD and
producethe code needed to import and export the content between C data and
XMLstorage. This is left as an exercise to the reader :-)</p><p>Feel free to use <a href="example/gjobread.c">the code for the full
Cparsing example</a>as a template, it is also available with Makefile in
theGnome CVS base under gnome-xml/example</p><p><a href="bugs.html">Daniel Veillard</a></p></td></tr></table></td></tr></table></td></tr></table></td></tr></table></td></tr></table></body></html>