Pranav Ballaney
Pranav Ballaney

Reputation: 315

Deserializing recursive XML using Serde in Rust

I'm trying to deserialize MathML using Serde and Quick-XML in Rust. I'm having trouble trying to write the structs because of the recursive nature of MathML. Here's a minimal, reproducible example:

use quick_xml;
use serde::Deserialize;

#[derive(Debug, Deserialize)]
#[serde(rename = "math", rename_all = "camelCase")]
pub enum MathNode {
    Apply(Vec<MathNode>),
    Ci(Vec<MathNode>),
    Text(String),
    #[serde(rename = "math")]
    Root(Vec<MathNode>),
}

pub fn parse(doc: &str) -> Result<MathNode, quick_xml::DeError> {
    let raw_model = quick_xml::de::from_str(doc);
    raw_model
}

pub fn main() {
    let test = "<math>
                    <apply>
                        <ci type=\"integer\">5</ci>
                    </apply>
                </math>";
    let parsed = parse(test);
    println!("{:?}", parsed);
}

There's a stack overflow, probably due to an infinite loop as suggested here and here. I tried to implement their suggestions but to no avail.

Upvotes: 8

Views: 6521

Answers (2)

Mingun
Mingun

Reputation: 179

@Pranav Ballaney, strange that you didn't create an issue on quick-xml bug-tracker, so I open it. Yes, currently the code that uses newtype variants does not work. This is because newtype is completely transparent in XML. It just recursively calls the deserialization of the inner type using the same deserializer. However, it is possible to deserialize you example, if change MathNode definition slightly and use a struct variant with one $value field.

#[test]
fn recursive() {
    #[derive(Debug, Deserialize, PartialEq)]
    #[serde(rename_all = "camelCase")]
    enum MathNode {
        Apply {
            #[serde(rename = "$value")]
            value: Vec<MathNode>,
        },
        Ci {
            #[serde(rename = "$value")]
            value: Vec<MathNode>,
        },
        #[serde(rename = "$text")]
        Text(String),
        #[serde(rename = "math")]
        Root {
            #[serde(rename = "$value")]
            value: Vec<MathNode>,
        },
    }
    let test = r#"
    <math>
        <apply>
            <ci type="integer">5</ci>
        </apply>
    </math>"#;

    assert_eq!(
        quick_xml::de::from_str::<MathNode>(test).unwrap(),
        MathNode::Root {
            value: vec![MathNode::Apply {
                value: vec![MathNode::Ci {
                    value: vec![MathNode::Text("5".into())]
                }],
            }],
        }
    );
}

Upvotes: 0

kometen
kometen

Reputation: 7852

Here is an example on how to parse a nested XML-structure using serde. This code is based on this answer. The sample XML have nodes called measuredValue that is placed into a Vec<structs::Data>.

Cargo.toml:

[dependencies]
serde_derive = "1.0"
serde = "1.0"
serde-xml-rs = "0.4"
serde_json = "1.0.64"

structs.rs:

// XML-root
#[derive(Deserialize, Debug)]
pub(crate) struct D2LogicalModel {
    pub(crate) payloadPublication: PayloadPublication,
}

// payloadPublication
#[derive(Deserialize, Debug)]
pub(crate) struct PayloadPublication {
    lang: String,
    pub(crate) publicationTime: PublicationTime,
    pub(crate) siteMeasurements: Vec<SiteMeasurements>,
}

#[derive(Deserialize, Debug)]
pub(crate) struct PublicationTime {
    #[serde(rename = "$value")]
    pub(crate) publicationTime: String,
}

// // siteMeasurements, the various weather measurements are below (sub-root).
#[derive(Deserialize, Debug)]
pub(crate) struct SiteMeasurements {
    pub(crate) measurementSiteReference: MeasurementSiteReference,
    pub(crate) measurementTimeDefault: MeasurementTimeDefault,
    #[serde(default)]
    pub(crate) measuredValue: Vec<MeasuredValue_>,
}

#[derive(Deserialize, Debug)]
pub(crate) struct MeasurementSiteReference {
    pub(crate) id: u16,
    targetClass: String,
    version: u16,
}

#[derive(Deserialize, Debug)]
pub(crate) struct MeasurementTimeDefault {
    #[serde(rename = "$value")]
    pub(crate) measurementTimeDefault: String,
}

// Common for all measurements.
#[derive(Deserialize, Debug)]
pub(crate) struct MeasuredValue_ {
    pub(crate) index: u16,
    pub(crate) measuredValue: MeasuredValue,
}

#[derive(Deserialize, Debug)]
pub(crate) struct MeasuredValue {
    pub(crate) basicData: BasicData,
}

// Split based on type of measurement. Below this point the tree is different.
#[derive(Deserialize, Debug, Default)]
pub(crate) struct BasicData {
    #[serde(default)]
    pub(crate) precipitationDetail: PrecipitationDetail,
    #[serde(default)]
    pub(crate) wind: Wind,
    #[serde(default)]
    pub(crate) temperature: Temperature_, // Add underscore since this collides with another struct
}

// precipitationIntensity
#[derive(Deserialize, Debug, Default)]
pub(crate) struct PrecipitationDetail {
    #[serde(default)]
    pub(crate) precipitationIntensity: PrecipitationIntensity,
}

#[derive(Deserialize, Debug, Default)]
pub(crate) struct PrecipitationIntensity {
    #[serde(default = "precipitation_intensity")]
    pub(crate) field_description: String,
    #[serde(default)]
    pub(crate) millimetresPerHourIntensity: MillimetresPerHourIntensity,
}

#[derive(Deserialize, Debug, Default)]
pub(crate) struct MillimetresPerHourIntensity {
    #[serde(rename = "$value")]
    pub(crate) millimetresPerHourIntensity: f32,
}

#[derive(Deserialize, Debug, Default)]
pub(crate) struct Temperature {
    #[serde(rename = "$value")]
    pub(crate) temperature: f32,
}

// windSpeed
#[derive(Deserialize, Debug, Default)]
pub(crate) struct Wind {
    #[serde(default)]
    pub(crate) windSpeed: WindSpeed,
}

#[derive(Deserialize, Debug, Default)]
pub(crate) struct WindSpeed {
    #[serde(default = "wind_speed")]
    pub(crate) field_description: String,
    #[serde(default)]
    pub(crate) speed: Speed,
}

#[derive(Deserialize, Debug, Default)]
pub(crate) struct Speed {
    #[serde(rename = "$value")]
    pub(crate) speed: f32,
}

// airTemperature
#[derive(Deserialize, Debug, Default)]
pub(crate) struct Temperature_ {
    #[serde(default)]
    pub(crate) airTemperature: AirTemperature,
}

#[derive(Deserialize, Debug, Default)]
pub(crate) struct AirTemperature {
    #[serde(default = "air_temperature")]
    pub(crate) field_description: String,
    #[serde(default)]
    pub(crate) temperature: Temperature,
}

// Add default values in serde.
fn precipitation_intensity() -> String {
    "precipitation_intensity".to_string()
}

fn wind_speed() -> String {
    "wind_speed".to_string()
}

fn air_temperature() -> String {
    "air_temperature".to_string()
}

#[derive(Serialize)]
pub(crate) struct WeatherMeasurement {
    pub(crate) measurement_time_default: String,
    pub(crate) id: u16,
    pub(crate) data: Vec<Data>,
}

#[derive(Serialize)]
pub(crate) struct Data {
    pub(crate) index: u16,
    pub(crate) field_description: String,
    pub(crate) measurement: f32,
}

main.rs:

#![allow(non_snake_case)]

mod structs;

use std::fs;

#[macro_use]
extern crate serde_derive;
extern crate serde;
extern crate serde_xml_rs;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    println!("Goodbye XML!");

    let filename = "./sample.xml";
    let content= fs::read_to_string(filename).expect("Unable to read file");

    let d2LogicalModel: structs::D2LogicalModel =  serde_xml_rs::from_str(&*content).unwrap();

    let mut measurements: Vec<structs::WeatherMeasurement> = Vec::new();

    for site in &d2LogicalModel.payloadPublication.siteMeasurements {
        // The actual weather data
        let mut readings: Vec<structs::Data> = Vec::new();

        let measurement_time_default = &site.measurementTimeDefault.measurementTimeDefault;
        let id = &site.measurementSiteReference.id;

        for measured_value in &site.measuredValue {
            let index = &measured_value.index;
            let weather_node = &measured_value.measuredValue.basicData;

            // precipitationIntensity
            let field_description = &weather_node
                .precipitationDetail
                .precipitationIntensity
                .field_description;
            if !field_description.is_empty() {
                let measurement = &weather_node
                    .precipitationDetail
                    .precipitationIntensity
                    .millimetresPerHourIntensity
                    .millimetresPerHourIntensity;
                let r = structs::Data {
                    index: *index,
                    field_description: field_description.clone(),
                    measurement: *measurement,
                };
                readings.push(r);
                /*println!("measurement time-default: {}, id: {}, index: {}, field description: {}, measurement: {}",
                measurement_time_default, id, index, field_description, measurement);*/
            };

            // windSpeed
            let field_description = &weather_node.wind.windSpeed.field_description;
            if !field_description.is_empty() {
                let measurement = &weather_node.wind.windSpeed.speed.speed;
                let r = structs::Data {
                    index: *index,
                    field_description: field_description.clone(),
                    measurement: *measurement,
                };
                readings.push(r);
                /*println!("measurement time-default: {}, id: {}, index: {}, field description: {}, measurement: {}",
                measurement_time_default, id, index, field_description, measurement);*/
            };

            // airTemperature
            let field_description = &weather_node.temperature.airTemperature.field_description;
            if !field_description.is_empty() {
                let measurement = &weather_node
                    .temperature
                    .airTemperature
                    .temperature
                    .temperature;
                let r = structs::Data {
                    index: *index,
                    field_description: field_description.clone(),
                    measurement: *measurement,
                };
                readings.push(r);
                /*println!("measurement time-default: {}, id: {}, index: {}, field description: {}, measurement: {}",
                measurement_time_default, id, index, field_description, measurement);*/
            };

        }

        let wm = structs::WeatherMeasurement {
            measurement_time_default: measurement_time_default.clone(),
            id: *id,
            data: readings,
        };
        measurements.push(wm);
        // Add final struct here
    }

    let jm = serde_json::to_string(&measurements)?;
    println!("{:?}", &jm);

    Ok(())
}

sample.xml:

<d2LogicalModel modelBaseVersion="2" xmlns="http://datex2.eu/schema/2/2_0">
    <payloadPublication lang="nob" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="MeasuredDataPublication">
        <publicationTime>2021-03-24T21:02:28.762+01:00</publicationTime>
        <siteMeasurements>
            <measurementSiteReference id="228" targetClass="MeasurementSiteRecord" version="3576"/>
            <measurementTimeDefault>2021-03-24T20:50:00+01:00</measurementTimeDefault>
            <measuredValue index="2501">
                <measuredValue>
                    <basicData xsi:type="PrecipitationInformation">
                        <precipitationDetail>
                            <precipitationIntensity>
                                <millimetresPerHourIntensity>0.0</millimetresPerHourIntensity>
                            </precipitationIntensity>
                        </precipitationDetail>
                    </basicData>
                </measuredValue>
            </measuredValue>
            <measuredValue index="901">
                <measuredValue>
                    <basicData xsi:type="WindInformation">
                        <wind>
                            <windSpeed>
                                <speed>21.24</speed>
                            </windSpeed>
                        </wind>
                    </basicData>
                </measuredValue>
            </measuredValue>
            <measuredValue index="101">
                <measuredValue>
                    <basicData xsi:type="TemperatureInformation">
                        <temperature>
                            <airTemperature>
                                <temperature>0.2</temperature>
                            </airTemperature>
                        </temperature>
                    </basicData>
                </measuredValue>
            </measuredValue>
        </siteMeasurements>
    </payloadPublication>
</d2LogicalModel>

Upvotes: 0

Related Questions