Reputation: 5
I think my solution is more complex than it should be, but I'm not familiar enough with Rust yet to know a better way. Below is the only solution I got working after much trial and error.
data_type
description which is stored as a &str or String depending on how the Rust struct holding the records is defined.Assume many more records and many more data types. This is not used in the working code.
use quick_xml;
let xml_desc = r#""
<Records>
<RecordDesc>
<name>Single</name>
<number>1</number>
<location unit="byte">0</location>
<data_type>IEEE754LSBSingle</data_type>
<length> unit="byte">4</length>
</RecordDesc>
<RecordDesc>
<name>Double</name>
<number>1</number>
<location unit="byte">5</location>
<data_type>IEEE754LSBDouble</data_type>
<length> unit="byte">8</length>
</RecordDesc>
</Records>
""#;
// quick_xml/serde used to get that string of xml into the rust records
#[derive(Serialize, Deserialize, Debug)]
pub struct Records {
pub records: Vec<Record>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct Record {
...,
pub data_type: &str, //could also be String, but this example doesn't use this struct
...,
}
// For each record use data_type to cast into type rust can use
Creates functions that convert the &mut &[u8] into specific Rust equivalent types. Example output shown in first_example
.
macro_rules! type_cast_function {
($func_name:ident, $endian:ident, $output_type:ty ) => {
fn $func_name(input: &mut &[u8]) -> $output_type {
let (int_bytes, _) = input.split_at(std::mem::size_of::<$output_type>());
<$output_type>::$endian(int_bytes.try_into().unwrap())
}
};
}
Creates impl blocks for unwraping each specific value from the varients in DataTypes. Example output shown in first_example
.
macro_rules! create_unwrap_impl_for_type {
($unwrap_name:ident, $variant:path, $output_type:ty) => {
impl DataTypes {
pub fn $unwrap_name(self) -> $output_type {
match self {
$variant(val) => val,
_ => panic!(),
}
}
}
};
}
Note: Case is reflective of the case in the xml_desc
#[derive(Debug)]
pub enum DataTypes {
// 4 Bytes
IEEE754LSBSingle(f32),
// 8 Bytes
IEEE754LSBDouble(f64),
}
Matches data_type: &str
descriptions and generates the relevant function and impl block for unwrapping the value for each match to be used elsewhere.
fn first_example(){
// Simulated Data that would come from parsing the binary file
let mut data: &[u8] = &[172, 152, 111, 195];
let mut data2: &[u8] = &[172, 152, 111, 195, 117, 93, 133, 192];
// Simulated looping through records with different types
for dtype in ["IEEE754LSBSingle", "IEEE754LSBDouble"] {
match dtype {
"IEEE754LSBSingle" => {
create_unwrap_impl_for_type!(unwrap_le_f32,DataTypes::IEEE754LSBSingle,f32);
/*
outputs:
impl DataTypes {
pub fn unwrap_le_f32(self) -> f32 {
match self {
DataTypes::IEEE754LSBSingle(val) => val,
_ => panic!(),
}
}
}
*/
type_cast_function!(read_le_f32, from_le_bytes, f32);
/*
outputs:
fn read_le_f32(input: &mut &[u8]) -> f32 {
let (int_bytes, _) = input.split_at(std::mem::size_of::<f32>());
f32::from_le_bytes(int_bytes.try_into().unwrap())
}
*/
let single = DataTypes::IEEE754LSBSingle(read_le_f32(&mut data)).unwrap_le_f32();
println!("First Example\tIEEE754LSBSingle {:?}",single);
},
"IEEE754LSBDouble" => {
create_unwrap_impl_for_type!(unwrap_le_f64,DataTypes::IEEE754LSBDouble,f64);
/*
outputs:
impl DataTypes {
pub fn unwrap_le_f64(self) -> f64 {
match self {
DataTypes::IEEE754LSBDouble(val) => val,
_ => panic!(),
}
}
}
*/
type_cast_function!(read_le_f64, from_le_bytes, f64);
/*
outputs:
fn read_le_f64(input: &mut &[u8]) -> f64 {
let (int_bytes, _) = input.split_at(std::mem::size_of::<f64>());
f64::from_le_bytes(int_bytes.try_into().unwrap())
}
*/
let double = DataTypes::IEEE754LSBDouble(read_le_f64(&mut data2)).unwrap_le_f64();
println!("First Example\tIEEE754LSBDouble {:?}",double);
},
_ => panic!(),
};
}
}
One macro for creating the function and impl blocks from the other macros. Makes the difference between the first_example
above and the second_example
below
macro_rules! generate_casting_extraction_functions {
($func_name:ident, $endian:ident, $unwrap_name:ident, $variant:path, $output_type:ty) => {
create_unwrap_impl_for_type!($unwrap_name, $variant, $output_type);
type_cast_function!($func_name, $endian, $output_type);
}
}
Matches data_type: &str
descriptions and generates the relevant function and impl block for unwrapping the value for each match to be used elsewhere.
fn second_example(){
// Simulated Data that would come from parsing the binary file
let mut data: &[u8] = &[172, 152, 111, 195];
let mut data2: &[u8] = &[172, 152, 111, 195, 117, 93, 133, 192];
// Simulated looping through records with different types
for dtype in ["IEEE754LSBSingle", "IEEE754LSBDouble"] {
match dtype {
"IEEE754LSBSingle" => {
// Same output as first_example
generate_casting_extraction_functions!(read_le_f32_2, from_le_bytes,unwrap_le_f32_2,DataTypes::IEEE754LSBSingle,f32);
let single = DataTypes::IEEE754LSBSingle(read_le_f32_2(&mut data)).unwrap_le_f32_2();
println!("Second Example\tIEEE754LSBSingle {:?}",single);
},
"IEEE754LSBDouble" => {
// Same output as first_example
generate_casting_extraction_functions!(read_le_f64_2, from_le_bytes,unwrap_le_f64_2,DataTypes::IEEE754LSBDouble,f64);
let double = DataTypes::IEEE754LSBDouble(read_le_f64_2(&mut data2)).unwrap_le_f64_2();
println!("Second Example\tIEEE754LSBDouble {:?}",double);
},
_ => panic!(),
};
}
}
fn main() {
first_example();
second_example();
}
Upvotes: 0
Views: 319
Reputation: 15012
Here's what I would do.
First of all, you want two enums:
data_type
string, which I will call DataKind
#[derive(Clone, Copy, Debug)]
enum DataKind {
// 4 bytes
IEEE754LSBSingle,
// 8 bytes
IEEE754LSBDouble,
...etc
}
#[derive(Debug)]
enum DataTypes {
// 4 bytes
IEEE754LSBSingle(f32),
// 8 bytes
IEEE754LSBDouble(f64),
...etc
}
Then, you want a function to interpret the necessary number of input bytes for the target type and store the result in the corresponding DataTypes
value:
impl DataKind {
fn parse(self, input: &mut &[u8]) -> DataTypes {
match self {
DataKind::IEEE754LSBSingle => DataTypes::IEEE754LSBSingle({
let (bytes, _) = input.split_at(std::mem::size_of::<f32>());
f32::from_le_bytes(bytes.try_into().unwrap())
}),
DataKind::IEEE754LSBDouble => DataTypes::IEEE754LSBDouble({
let (bytes, _) = input.split_at(std::mem::size_of::<f64>());
f64::from_le_bytes(bytes.try_into().unwrap())
}),
...etc
}
}
}
Thankfully, it's pretty easy to generate all of these at once with a macro:
macro_rules! generate_datatypes_parsing {
[$( $name:ident($target_type:ty => $conversion:ident) ),+ $(,)*] => {
#[derive(Clone, Copy, Debug)]
pub enum DataKind {
$( $name, )*
}
#[derive(Debug)]
pub enum DataTypes {
$( $name($target_type), )*
}
impl DataKind {
fn parse(self, input: &mut &[u8]) -> DataTypes {
match self {
$(
DataKind::$name => DataTypes::$name({
let (bytes, _) = input.split_at(
std::mem::size_of::<$target_type>()
);
<$target_type>::$conversion(bytes.try_into().unwrap())
}),
)*
}
}
}
};
}
generate_datatypes_parsing![
IEEE754LSBSingle(f32 => from_le_bytes),
IEEE754LSBDouble(f64 => from_le_bytes),
...etc
];
Then you can use DataKind::parse
like so:
fn main() {
// Simulated Data that would come from parsing the binary file
let mut data: &[u8] = &[172, 152, 111, 195];
let mut data2: &[u8] = &[172, 152, 111, 195, 117, 93, 133, 192];
// parsing will eventually go in a loop somewhere
println!("First Example\t{:?}", DataKind::IEEE754LSBSingle.parse(&mut data));
println!("First Example\t{:?}", DataKind::IEEE754LSBDouble.parse(&mut data2));
}
DataKind
?It's best to use an enum like DataKind
, because this way you get more guarantees from the compiler. It's also far easier to pass around a Copy
enum with no lifetimes than a &str
with some lifetime you need to worry about.
Of course, you should #[derive(Deserialize)]
for DataKind
so serde can do that conversion from &str
for you.
Result
You may want to return a Result
from fn parse
with a custom error type. If you do, I'd recommend using a custom split_at
function that also returns a Result
if it goes out of bounds.
Upvotes: 1