Applescript - Parsing XML

I try to create Applescript to extract value from .xfdf


<?xml version="1.0" encoding="UTF-8"?>
<xfdf xmlns="" xml:space="preserve">
    <square color="#FF0000" creationdate="D:20130828114843+05'30'" date="D:20130828114901+05'30'" flags="print" name="Xi6cOkAWgWHcAhpfBkR5A7" page="0" rect="347.7599999991828,1041.8400000004283,453.5999999989341,1056.9600000003927" subject="Rectangle" title="1 im, awltest7 (AWLTEST7.IM)">
          <p>Text Not Clear</p>
      <popup open="yes" page="0" rect="453.5999999989341,944.4600000003926,573.5999999989341,1056.9600000003927" />
    <square color="#FF0000" creationdate="D:20130828114910+05'30'" date="D:20130828114919+05'30'" flags="print" name="ptmmBKtfoDEbVzirMgZLnY" page="0" rect="511.1999999987987,1092.960000000308,550.7999999987057,1123.9200000002352" subject="Rectangle" title="2 im, awltest7 (AWLTEST7.IM)">
          <p>Incorrect dimension</p>
      <popup open="yes" page="0" rect="550.7999999987057,1011.4200000002352,670.7999999987056,1123.9200000002352" />
    <square color="#FF0000" creationdate="D:20130828114956+05'30'" date="D:20130828115004+05'30'" flags="print" name="8LaAl2Upx4LEaQptQKXoZx" page="0" rect="355.67999999916424,731.5200000011573,431.99999999898483,750.2400000011135" subject="Rectangle" title="3 im, awltest7 (AWLTEST7.IM)">
          <p>Incorrect Text</p>
      <popup open="yes" page="0" rect="431.99999999898483,637.7400000011133,551.9999999989849,750.2400000011135" />


set theXMLFile to ((choose file) as string)

tell application "System Events"
    tell XML element "xfdf" of contents of XML file theXMLFile
        set typeText to (value of XML element "p")
        set nameText to (value of XML element "p")
    end tell
end tell

When I run the script I get this error: System Events got an error: Can’t get XML element "p" of XML element "xfdf" of contents of XML file. Is there anywhere I can extract these value from the XML. Value to extract out "popup", "p".

set theXMLFile to ((choose file) as string)
tell application "System Events"
    set theXMLFile to XML file theXMLFile
    set loops to XML elements of XML element "annots" of XML element "xfdf" of theXMLFile whose name is "square"
    set coor to {}
    repeat with i from 1 to (count loops)
        --set end of p to value of XML element "square" of XML element "body" of XML element "contents-richtext" of item i of squares
        set end of coor to value of XML attributes of XML element "square" of item i of loops
    end repeat

end tell

Darrick Herwehe
You are directly asking the xfdf element for p, but xfdf is not its parent. You need to dig down into the hierarchy to reach it.

set theXMLFile to ((choose file) as string)
tell application "System Events"
    set theXMLFile to XML file theXMLFile
    set squares to XML elements of XML element "annots" of XML element "xfdf" of theXMLFile whose name is "square"
    set p to {}
    set attrs to {}
    repeat with i from 1 to (count squares)
        set end of p to value of XML element "p" of XML element "body" of XML element "contents-richtext" of item i of squares
        set end of attrs to value of XML attributes of XML element "popup" of item i of squares
    end repeat
end tell
p --> {"Text Not Clear", "Incorrect dimension", "Incorrect Text"}
attrs --> {{"yes", "0", "453.5999999989341,944.4600000003926,573.5999999989341,1056.9600000003927"}, {"yes", "0", "550.7999999987057,1011.4200000002352,670.7999999987056,1123.9200000002352"}, {"yes", "0", "431.99999999898483,637.7400000011133,551.9999999989849,750.2400000011135"}}

Another option is to use the XML Tools Scripting Addition from Late Night Software.

I have written a recursive helper function that can solve this.

It searches the xml looking for a particular name and returns a list of XML Elements you can then iterate over to get your values.

The benefit of this is you don't need to know the absolute paths to an Element up front.


set theXMLFile to ((choose file) as string)

tell application "System Events"
    set xmlData to XML file theXMLFile

    set found_elements to my getXMLElementsByName("p", contents of xmlData)

    log ("Found " & (count of found_elements) & " <p> nodes")

    repeat with i from 1 to count of found_elements
        log (tab & quote & (the value of (item i of found_elements) as text) & quote)
    end repeat

    set found_elements to my getXMLElementsByName("popup", contents of xmlData)
    log ("Found " & (count of found_elements) & " <popup> nodes")       
end tell

on getXMLElementsByName(search_name, search_xml_element)

    set found to {}

    using terms from application "System Events"            
        tell search_xml_element
            set c to the count of XML elements
            repeat with i from 1 to c
                if (the name of XML element i is search_name) then
                    set found to found & {XML element i}
                end if

                if (the (count of XML elements of XML element i) > 0) then
                    set children_found to my getXMLElementsByName(search_name, XML element i)
                    if (the (count of children_found) > 0) then
                        set found to found & children_found
                    end if
                end if

            end repeat
        end tell
    end using terms from

    return found
end getXMLElementsByName

