kostas
kostas

Reputation: 2019

Reading data from CSV file into an ndarray in Rust returns ShapeError

Attempting to read a CSV file and return an ndarray returns an Error. The code listing is:

use ndarray::prelude::*;
use ndarray::{NdIndex, RemoveAxis, OwnedRepr, Array2, ShapeError};

fn read_file(filename:&str, sep:&str, header:bool) -> Result<ArrayBase<OwnedRepr<Vec<f64>>, Dim<[usize; 2]>>, ShapeError>{
    
    let path = Path::new(filename);
    let input = File::open(path).unwrap();
    let reader = BufReader::new(input);
    let mut vector:Vec<Vec<f64>> = Vec::new();

    let skip = if header {1} else {0};
    for line in reader.lines().skip(skip) {
        let data:Vec<String> = line.unwrap().split(sep).into_iter().map(|x| {x.to_string()}).collect();
        let data = data.into_iter().map(|x| x.parse().unwrap()).collect::<Vec<f64>>();
        println!("{:?}", data);
        vector.extend_from_slice(&[data]);
    }


    println!("{}, {}", vector.len(), vector[0].len());
    Array2::from_shape_vec((vector.len(), vector[0].len()), vector)
    
}

for the shake of completeness and reproducibility the contents of the CSV file are shown below:

CD4 CD8b CD3 CD8
199 420 132 226
294 311 241 164
85 79 14 218
19 1 141 130

Calling read_file(filename, ",", true) returns Err(ShapeError/OutOfBounds: out of bounds indexing)

Upvotes: 1

Views: 43

Answers (1)

Finn Bear
Finn Bear

Reputation: 1810

Here is a corrected albeit non-optimized version, taking into account that Array2::from_shape_vec wants a flat input vector:

use ndarray::prelude::*;
use ndarray::{Array2, OwnedRepr, ShapeError};
use std::io::{BufRead, BufReader, Cursor};

fn read_file(
    input: Cursor<Vec<u8>>,
    sep: &str,
    header: bool,
) -> Result<ArrayBase<OwnedRepr<f64>, Dim<[usize; 2]>>, ShapeError> {
    let reader = BufReader::new(input);
    let mut vector: Vec<f64> = Vec::new();

    let skip = if header { 1 } else { 0 };
    let mut columns = 0;
    for line in reader.lines().skip(skip) {
        let data: Vec<String> = line
            .unwrap()
            .split(sep)
            .into_iter()
            .map(|x| x.to_string())
            .collect();
        let data = data
            .into_iter()
            .map(|x| x.parse().unwrap())
            .collect::<Vec<f64>>();
        println!("{:?}", data);
        vector.extend_from_slice(&data);
        columns = data.len();
    }

    println!("{}, {}", vector.len() / columns, columns);
    Array2::from_shape_vec((vector.len() / columns, columns), vector)
}

pub fn main() {
    let input = r#"CD4,CD8b,CD3,CD8
199,420,132,226
294,311,241,164
85,79,14,218
19,1,141,130"#;

    let array = read_file(Cursor::new(input.to_owned().into()), ",", true).unwrap();

    println!("{array:?}");
}

Note the return type changed (removed extraneous Vec).

I changed File I/O to in-memory data structure to make the answer self-contained. You could optimize this by using extend instead of extend_from_slice on a collected Vec.

Upvotes: 1

Related Questions