use std::io;
use std::ops;
use byteorder::{ReadBytesExt, WriteBytesExt, BigEndian};
use {Result, Error, Reader, NextField};
pub struct Indexed<R, I> {
rdr: Reader<R>,
idx: I,
count: u64,
}
impl<R, I> ops::Deref for Indexed<R, I> {
type Target = Reader<R>;
fn deref(&self) -> &Reader<R> { &self.rdr }
}
impl<R, I> ops::DerefMut for Indexed<R, I> {
fn deref_mut(&mut self) -> &mut Reader<R> { &mut self.rdr }
}
impl<R, I> Indexed<R, I> where R: io::Read + io::Seek, I: io::Read + io::Seek {
pub fn open(mut rdr: Reader<R>, mut idx: I) -> Result<Indexed<R, I>> {
try!(idx.seek(io::SeekFrom::End(-8)));
let mut count = try!(idx.read_u64::<BigEndian>());
if rdr.has_headers && count > 0 {
count -= 1;
let _ = try!(rdr.byte_headers());
}
Ok(Indexed {
rdr: rdr,
idx: idx,
count: count,
})
}
pub fn seek(&mut self, mut i: u64) -> Result<()> {
if i >= self.count {
return Err(Error::Index(format!(
"Record index {} is out of bounds. (There are {} records.)",
i, self.count)));
}
if self.rdr.has_headers {
i += 1;
}
try!(self.idx.seek(io::SeekFrom::Start(i * 8)));
let offset = try!(self.idx.read_u64::<BigEndian>());
self.rdr.seek(offset)
}
pub fn count(&self) -> u64 {
self.count
}
}
pub fn create_index<R, W>(mut rdr: Reader<R>, mut wtr: W) -> Result<()>
where R: io::Read + io::Seek, W: io::Write {
try!(rdr.seek(0));
let mut count = 0u64;
while !rdr.done() {
try!(wtr.write_u64::<BigEndian>(rdr.byte_offset()));
loop {
match rdr.next_bytes() {
NextField::EndOfCsv => break,
NextField::EndOfRecord => { count += 1; break; },
NextField::Error(err) => return Err(err),
NextField::Data(_) => {}
}
}
}
wtr.write_u64::<BigEndian>(count).map_err(From::from)
}
#[cfg(test)]
mod tests {
use std::io::{self, Write};
use Reader;
type CsvReader = Reader<io::Cursor<Vec<u8>>>;
type Bytes = io::Cursor<Vec<u8>>;
type Indexed = super::Indexed<Bytes, Bytes>;
fn index<S: Into<String>>(s: S) -> Indexed {
index_with(s, |rdr| rdr, |rdr| rdr)
}
fn index_nh<S: Into<String>>(s: S) -> Indexed {
let then = |rdr: CsvReader| rdr.has_headers(false);
index_with(s, &then, &then)
}
fn index_with<S, F, G>(s: S, create: F, new: G) -> Indexed
where S: Into<String>,
F: FnOnce(CsvReader) -> CsvReader,
G: FnOnce(CsvReader) -> CsvReader {
let data = s.into();
let mut idx_bytes = io::Cursor::new(vec![]);
super::create_index(create(Reader::from_string(&*data)),
idx_bytes.by_ref()).unwrap();
super::Indexed::open(new(Reader::from_string(data)),
idx_bytes).unwrap()
}
fn next(idx: &mut Indexed) -> Vec<String> {
idx.records().next().unwrap().unwrap()
}
fn nth(idx: &mut Indexed, i: u64) -> Vec<String> {
idx.seek(i).unwrap();
next(idx)
}
#[test]
fn headers_one_field() {
let data = "\
h1
a
b
c
";
let mut idx = index(data);
assert_eq!(idx.count(), 3);
assert_eq!(nth(&mut idx, 0), vec!["a"]);
assert_eq!(nth(&mut idx, 1), vec!["b"]);
assert_eq!(nth(&mut idx, 2), vec!["c"]);
}
#[test]
fn headers_many_fields() {
let data = "\
h1,h2,h3
a,b,c
d,e,f
g,h,i
";
let mut idx = index(data);
assert_eq!(idx.count(), 3);
assert_eq!(nth(&mut idx, 0), vec!["a", "b", "c"]);
assert_eq!(nth(&mut idx, 1), vec!["d", "e", "f"]);
assert_eq!(nth(&mut idx, 2), vec!["g", "h", "i"]);
}
#[test]
fn no_headers_one_field() {
let data = "\
h1
a
b
c
";
let mut idx = index_nh(data);
assert_eq!(idx.count(), 4);
assert_eq!(nth(&mut idx, 0), vec!["h1"]);
assert_eq!(nth(&mut idx, 1), vec!["a"]);
assert_eq!(nth(&mut idx, 2), vec!["b"]);
assert_eq!(nth(&mut idx, 3), vec!["c"]);
}
#[test]
fn no_headers_many_fields() {
let data = "\
h1,h2,h3
a,b,c
d,e,f
g,h,i
";
let mut idx = index_nh(data);
assert_eq!(idx.count(), 4);
assert_eq!(nth(&mut idx, 0), vec!["h1", "h2", "h3"]);
assert_eq!(nth(&mut idx, 1), vec!["a", "b", "c"]);
assert_eq!(nth(&mut idx, 2), vec!["d", "e", "f"]);
assert_eq!(nth(&mut idx, 3), vec!["g", "h", "i"]);
}
#[test]
fn switch_headers_one_field1() {
let data = "\
h1
a
b
c
";
let mut idx = index_with(data, |r| r.has_headers(false), |r| r);
assert_eq!(idx.count(), 3);
assert_eq!(nth(&mut idx, 0), vec!["a"]);
assert_eq!(nth(&mut idx, 1), vec!["b"]);
assert_eq!(nth(&mut idx, 2), vec!["c"]);
}
#[test]
fn switch_headers_one_field2() {
let data = "\
h1
a
b
c
";
let mut idx = index_with(data, |r| r, |r| r.has_headers(false));
assert_eq!(idx.count(), 4);
assert_eq!(nth(&mut idx, 0), vec!["h1"]);
assert_eq!(nth(&mut idx, 1), vec!["a"]);
assert_eq!(nth(&mut idx, 2), vec!["b"]);
assert_eq!(nth(&mut idx, 3), vec!["c"]);
}
#[test]
fn headers_one_field_newlines() {
let data = "
h1
a
b
c
";
let mut idx = index(data);
assert_eq!(idx.count(), 3);
assert_eq!(nth(&mut idx, 0), vec!["a"]);
assert_eq!(nth(&mut idx, 1), vec!["b"]);
assert_eq!(nth(&mut idx, 2), vec!["c"]);
}
}