Reputation: 23
I am trying to parse a UTF-8 xml file using SAX parser and i used the parser but it results an exception it's message "Expecting an element"
<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>
<config>
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath>
<filename>anonymous22242.3gp</filename>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:06</timestamp>
<note>test1</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لول</note>
</annotation>
<annotation>
<file>anonymous22242.3gp</file>
<timestamp>0:09</timestamp>
<note>لولو</note>
</annotation>
</config>
private static String fileDirectory;
private final static ArrayList<String> allFileNames = new ArrayList<String>();
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>();
private static String[] currentAnnotation = new String[3];
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser playbackParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean audioFullPath = false;
boolean audioName = false;
boolean annotationFile = false;
boolean annotationTimestamp = false;
boolean annotationNote = false;
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("filepath")) {
audioFullPath = true;
}
if (qName.equalsIgnoreCase("filename")) {
audioName = true;
}
if (qName.equalsIgnoreCase("file")) {
annotationFile = true;
}
if (qName.equalsIgnoreCase("timestamp")) {
annotationTimestamp = true;
}
if (qName.equalsIgnoreCase("note")) {
annotationNote = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
if (audioFullPath) {
String filePath = new String(ch, start, length);
System.out.println("Full Path : " + filePath);
fileDirectory = filePath;
audioFullPath = false;
}
if (audioName) {
String fileName = new String(ch, start, length);
System.out.println("File Name : " + fileName);
allFileNames.add(fileName);
audioName = false;
}
if (annotationFile) {
String fileName = new String(ch, start, length);
currentAnnotation[0] = fileName;
annotationFile = false;
}
if (annotationTimestamp) {
String timestamp = new String(ch, start, length);
currentAnnotation[1] = timestamp;
annotationTimestamp = false;
}
if (annotationNote) {
String note = new String(ch, start, length);
currentAnnotation[2] = note;
annotationNote = false;
allAnnotations.add(currentAnnotation);
}
}
};
InputStream inputStream = getStream("http://www.example.com/example.xml");
Reader xmlReader = new InputStreamReader(inputStream, "UTF-8");
InputSource xmlSource = new InputSource(xmlReader);
xmlSource.setEncoding("UTF-8");
playbackParser.parse(xmlSource, handler);
System.out.println(fileDirectory);
System.out.println(allFileNames);
System.out.println(allAnnotations);
} catch (Exception e) {
e.printStackTrace();
}
}
}
public Static InputStream getStream(String url)
{
try
{
connection = getConnection(url);
connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles"));
connection.setRequestProperty("Connection", "Keep-Alive");
connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8");
inputStream = connection.openInputStream();
return inputStream;
}
catch(Exception e)
{
System.out.println("NNNNNNN "+e.getMessage());
return null;
}
}
public HttpConnection getConnection(String url)
{
try
{
connection = (HttpConnection) Connector.open(url+getConnectionString());
}
catch(Exception e)
{
}
return connection;
}
but when i pass to the parse method the inputStream instead of inputSource it parses the file but still have a problem with Arabic characters between
playbackParser.parse(inputStream, handler);
Upvotes: 0
Views: 1097
Reputation: 596352
The XML you showed has unencoded Arabic characters in it. That is in violation of the XML's declared Encoding, which means the XML is malformed. A SAX parser processes data piece by piece sequentially, triggering events for each piece. It will not detect such an encoding error until it reaches the piece that contains those erroneous characters. There is nothing you can do about that. The XML needs to be fixed by its original author.
Upvotes: 1