Reputation: 2019
Attempting to read a CSV file and return an ndarray returns an Error. The code listing is:
use ndarray::prelude::*;
use ndarray::{NdIndex, RemoveAxis, OwnedRepr, Array2, ShapeError};
fn read_file(filename:&str, sep:&str, header:bool) -> Result<ArrayBase<OwnedRepr<Vec<f64>>, Dim<[usize; 2]>>, ShapeError>{
let path = Path::new(filename);
let input = File::open(path).unwrap();
let reader = BufReader::new(input);
let mut vector:Vec<Vec<f64>> = Vec::new();
let skip = if header {1} else {0};
for line in reader.lines().skip(skip) {
let data:Vec<String> = line.unwrap().split(sep).into_iter().map(|x| {x.to_string()}).collect();
let data = data.into_iter().map(|x| x.parse().unwrap()).collect::<Vec<f64>>();
println!("{:?}", data);
vector.extend_from_slice(&[data]);
}
println!("{}, {}", vector.len(), vector[0].len());
Array2::from_shape_vec((vector.len(), vector[0].len()), vector)
}
for the shake of completeness and reproducibility the contents of the CSV file are shown below:
CD4 | CD8b | CD3 | CD8 |
---|---|---|---|
199 | 420 | 132 | 226 |
294 | 311 | 241 | 164 |
85 | 79 | 14 | 218 |
19 | 1 | 141 | 130 |
Calling read_file(filename, ",", true)
returns Err(ShapeError/OutOfBounds: out of bounds indexing)
Upvotes: 1
Views: 43
Reputation: 1810
Here is a corrected albeit non-optimized version, taking into account that Array2::from_shape_vec
wants a flat input vector:
use ndarray::prelude::*;
use ndarray::{Array2, OwnedRepr, ShapeError};
use std::io::{BufRead, BufReader, Cursor};
fn read_file(
input: Cursor<Vec<u8>>,
sep: &str,
header: bool,
) -> Result<ArrayBase<OwnedRepr<f64>, Dim<[usize; 2]>>, ShapeError> {
let reader = BufReader::new(input);
let mut vector: Vec<f64> = Vec::new();
let skip = if header { 1 } else { 0 };
let mut columns = 0;
for line in reader.lines().skip(skip) {
let data: Vec<String> = line
.unwrap()
.split(sep)
.into_iter()
.map(|x| x.to_string())
.collect();
let data = data
.into_iter()
.map(|x| x.parse().unwrap())
.collect::<Vec<f64>>();
println!("{:?}", data);
vector.extend_from_slice(&data);
columns = data.len();
}
println!("{}, {}", vector.len() / columns, columns);
Array2::from_shape_vec((vector.len() / columns, columns), vector)
}
pub fn main() {
let input = r#"CD4,CD8b,CD3,CD8
199,420,132,226
294,311,241,164
85,79,14,218
19,1,141,130"#;
let array = read_file(Cursor::new(input.to_owned().into()), ",", true).unwrap();
println!("{array:?}");
}
Note the return type changed (removed extraneous Vec
).
I changed File I/O to in-memory data structure to make the answer self-contained. You could optimize this by using extend
instead of extend_from_slice
on a collected Vec
.
Upvotes: 1