Sandeep Yadav
Sandeep Yadav

Reputation: 179

Why serde_json rust so slow when deserializing in Untagged Enums

When running code:

#![allow(unused)]
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use std::time::Instant;

#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[serde(untagged)]
enum NumberOrString {
    String(String),
    Int(i64),
    Float(f64),
}

fn main() {
    let json_str = r#"{
        "17594136111": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499],
        "0000000017704043101": ["5", "7"],
        "features": ["a1"]
    }"#;

    let start_time = Instant::now();
    let parsed: HashMap<&str, Vec<serde_json::Value>> = serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());

    let start_time = Instant::now();
    let parsed2: HashMap<&str, Vec<NumberOrString>> = serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());
}

And the output comes as:

$ cargo run 
Compiling rust_tutorial v0.1.0 (/Users/sandeep.yadav/code/codetest/rust/rust_tutorial)
Finished dev [unoptimized + debuginfo] target(s) in 2.26s
Running `target/debug/rust_tutorial`
Elapsed time: 360.78µs
Elapsed time: 2.22ms

$ cargo run --release
Compiling rust_tutorial v0.1.0 (/Users/sandeep.yadav/code/codetest/rust/rust_tutorial)
Finished release [optimized] target(s) in 2.47s
Running `target/release/rust_tutorial`
Elapsed time: 74.82µs
Elapsed time: 439.90µs

$ cargo run --release
Finished release [optimized] target(s) in 0.03s
Running `target/release/rust_tutorial`
Elapsed time: 63.13µs
Elapsed time: 354.89µs

Why is untaggedJson so slow, when compared to another enum that is defined in serde_json::Value?

As serde_json::Value contains much more than String int64 and f64., it contains, Null, Bool, List and Object. I'm actually reducing the possible acceptable value set and still time taken increases by atleast 5 times?

Any alternates I can use to achieve same result?

Upvotes: 4

Views: 1899

Answers (1)

Wizard.Ritvik
Wizard.Ritvik

Reputation: 11642

After implementing a custom Visitor pattern for the NumberOrString Enum -- as @Chayim correctly mentions is how serde-json impls Deserialize for Value here -- and finally, after removing the default #derive(Deserialize), it looks like the performance times are now much improved, as shown below.

#![allow(unused)]

use std::collections::HashMap;
use std::fmt;
use std::time::Instant;

use serde::de::{Error, Visitor};
use serde::{Deserialize, Deserializer, Serialize};

#[derive(Serialize, Debug, Clone, PartialEq)]
// note: it appears that an "untagged enum" is not needed anymore
// #[serde(untagged)]
enum NumberOrString {
    String(String),
    Int(i64),
    Float(f64),
}

impl<'de> Deserialize<'de> for NumberOrString {
    #[inline]
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        use crate::NumberOrString::*;

        struct NumberOrStringVisitor;

        impl<'de> Visitor<'de> for NumberOrStringVisitor {
            type Value = NumberOrString;

            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
                formatter.write_str("a number or string")
            }

            #[inline]
            fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(Int(v))
            }

            #[inline]
            fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(Int(v as i64))
            }

            #[inline]
            fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(Float(v))
            }

            #[inline]
            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(String(v.to_owned()))
            }

            #[inline]
            fn visit_string<E>(self, v: std::string::String) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(String(v))
            }
        }

        deserializer.deserialize_any(NumberOrStringVisitor)
    }
}

fn main() {
    let json_str = r#"{
        "17594136111": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499],
        "0000000017704043101": ["5", "7"],
        "features": ["a1"]
    }"#;

    let start_time = Instant::now();
    let parsed: HashMap<&str, Vec<serde_json::Value>> =
        serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());

    let start_time = Instant::now();
    let parsed2: HashMap<&str, Vec<NumberOrString>> =
        serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());
}

My times (on my Windows 11 PC) were as follows:

$ cargo run --release
Finished release [optimized] target(s) in 0.03s
Running `target/release/rust_tutorial`
Elapsed time: 286.00µs
Elapsed time: 20.50µs

$ cargo run --release
Finished release [optimized] target(s) in 0.03s
Running `target/release/rust_tutorial`
Elapsed time: 303.90µs
Elapsed time: 24.00µs

Upvotes: 3

Related Questions