Reputation: 171
I'm using xmlTextReader to process large xml files. Now i need to validate the instance against an xsd schema. The api from libxml2 is a little bit confusing, how this is done.
With my approach, im getting the validation errors in the schemaParseErrorHandler function, but without any line numbers or column numbers. How can i get these informations?
#include <stdio.h>
#include <libxml/xmlreader.h>
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
static void schemaParseErrorHandler(void *arg, xmlErrorPtr error)
{
fprintf( stderr, "Error at line %d, column %d\n%s",
error->line, error->int2, error->message);
*((bool*)arg) = true;
}
int main( int argc, char **argv )
{
xmlInitParser();
xmlSchemaPtr schema = NULL;
xmlSchemaParserCtxtPtr schema_parser_ctxt = NULL;
int has_schema_errors = 0;
int ret = -1;
xmlSchemaValidCtxtPtr valid_ctxt = NULL;
if ((schema_parser_ctxt = xmlSchemaNewParserCtxt("example.xsd")))
{
schema = xmlSchemaParse(schema_parser_ctxt);
xmlSchemaFreeParserCtxt(schema_parser_ctxt);
if (schema)
{
valid_ctxt = xmlSchemaNewValidCtxt(schema);
}
}
xmlTextReaderPtr reader = NULL;
reader = xmlReaderForFile(filename, RPCXmlStream::STD_ENCODING, 0);
if (reader != NULL)
{
if (valid_ctxt)
{
xmlTextReaderSchemaValidateCtxt(reader, valid_ctxt, 0);
xmlSchemaSetValidStructuredErrors(valid_ctxt, schemaParseErrorHandler, &has_schema_errors);
}
ret = xmlTextReaderRead(reader);
while (ret == 1 && !has_schema_errors)
{
//... procesing informations
ret = xmlTextReaderRead(reader);
}
}
if (ret != 0)
{
xmlErrorPtr err = xmlGetLastError();
TRACE("%s: failed to parse in line %d, col %d. Error %d: %s\n",
err->file,
err->line,
err->int2,
err->code,
err->message);
}
xmlFreeTextReader(reader);
xmlCleanupParser();
return 0;
}
Another try was to use the function
xmlTextReaderSchemaValidate(reader, "example.xsd");
instead of creating an xmlSchemaNewValidCtxt, but than the programm is crashing on the first call to xmlTextReaderRead
.
So how is validation done right, so that the error informations includes line and column numbers?
Upvotes: 4
Views: 4991
Reputation: 3461
So, your questions got me thinking and when I looked in the libxml2 documentation,
Structure xmlError
struct _xmlError {
int domain : What part of the library raised this er
int code : The error code, e.g. an xmlParserError
char * message : human-readable informative error messag
xmlErrorLevel level : how consequent is the error
char * file : the filename
int line : the line number if available
char * str1 : extra string information
char * str2 : extra string information
char * str3 : extra string information
int int1 : extra number information
int int2 : error column # or 0 if N/A (todo: renam
void * ctxt : the parser context if available
void * node : the node in the tree
}
where we can clearly see that the xmlErrorPtr
which is returned by the function xmlGetLastError()
clearly contains information about the filename and the line number and the column number.
char * file : the filename
int line : the line number if available
...
int int2 : error column
So to test if this was possible or not, here is the code that I used (basically your code with minor changes to make it run on my system):
#include <stdio.h>
#include <stdbool.h>
#include <libxml/xmlreader.h>
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
static void schemaParseErrorHandler(void *arg, xmlErrorPtr error)
{
fprintf(stderr, "Error at line %d, column %d\n%s", error->line, error->int2, error->message);
*((bool*)arg) = true;
}
int main( int argc, char **argv )
{
xmlInitParser();
xmlSchemaPtr schema = NULL;
xmlSchemaParserCtxtPtr schema_parser_ctxt = NULL;
int has_schema_errors = 0;
int ret = -1;
xmlSchemaValidCtxtPtr valid_ctxt = NULL;
if ((schema_parser_ctxt = xmlSchemaNewParserCtxt("/home/junglefox/shiporder.xsd")))
{
schema = xmlSchemaParse(schema_parser_ctxt);
xmlSchemaFreeParserCtxt(schema_parser_ctxt);
if (schema)
{
valid_ctxt = xmlSchemaNewValidCtxt(schema);
}
}
xmlTextReaderPtr reader = NULL;
const char* filename = "/home/junglefox/shiporder.xml";
reader = xmlReaderForFile(filename, /*RPCXmlStream::STD_ENCODING,*/ NULL, 0);
if (reader != NULL)
{
if (valid_ctxt)
{
xmlTextReaderSchemaValidateCtxt(reader, valid_ctxt, 0);
xmlSchemaSetValidStructuredErrors(valid_ctxt, schemaParseErrorHandler, &has_schema_errors);
}
ret = xmlTextReaderRead(reader);
while (ret == 1 && !has_schema_errors)
{
//... procesing informations
ret = xmlTextReaderRead(reader);
}
}
if (ret != 0)
{
xmlErrorPtr err = xmlGetLastError();
fprintf(stdout, "%s: failed to parse in line %d, col %d. Error %d: %s\n",
err->file,
err->line,
err->int2,
err->code,
err->message);
}
xmlFreeTextReader(reader);
xmlCleanupParser();
return 0;
}
where, the shiporder.xml and shiporder.xsd used in that program were copied from the url and saved locally.
I compiled and ran the code like this:
junglefox@ubuntu:~$ gcc -o test_xsd main.c -I/usr/include/libxml2/ -lxml2 -L/usr/lib/x86_64-linux-gnu/
junglefox@ubuntu:~$ ./test_xsd
junglefox@ubuntu:~$
The output this time was nothing. As it should be as there were no errors.
If however now I make an intentional error in the shiporder.xml file, as shown below:
Here is the partial-snippet from the buggy shiporder.xml:
<?xml version="1.0" encoding="UTF-8"?>
...
<item>
<title>Hide your heart</title>
<quantity>1</quantity>
price>9.90</price>
</item>
</shiporder>
Notice the missing <
before price
!
Now I run the program again,
junglefox@ubuntu:~$ ./test_xsd
Error at line 22, column 0
Element 'item': Character content other than whitespace is not allowed because the content type is 'element-only'.
which answers your question(s):
With my approach, im getting the validation errors in the schemaParseErrorHandler function, but without any line numbers or column numbers. How can i get these informations?
and,
So how is validation done right, so that the error informations includes line and column numbers?
as the output clearly shows the line number 22 and column 0, where there was an unexpected empty space
due to the missing <
.
Upvotes: 4