Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to set header row #453

Merged
merged 15 commits into from
Oct 8, 2024
Merged
45 changes: 43 additions & 2 deletions src/auto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

use crate::errors::Error;
use crate::vba::VbaProject;
use crate::xlsb::XlsbOptions;
use crate::{
open_workbook, open_workbook_from_rs, Data, DataRef, Metadata, Ods, Range, Reader, ReaderRef,
Xls, Xlsb, Xlsx,
open_workbook, open_workbook_from_rs, Data, DataRef, Metadata, Ods, OdsOptions, Range, Reader,
ReaderOptions, ReaderRef, Xls, XlsOptions, Xlsb, Xlsx, XlsxOptions,
};
use std::borrow::Cow;
use std::fs::File;
Expand Down Expand Up @@ -74,17 +75,57 @@ where
}
}

pub enum AutoReaderOptions {
Xls(XlsOptions),
Xlsx(XlsxOptions),
Xlsb(XlsbOptions),
Ods(OdsOptions),
}

impl ReaderOptions for AutoReaderOptions {
fn with_header_row(self, header_row: u32) -> Self {
match self {
AutoReaderOptions::Xls(e) => AutoReaderOptions::Xls(e.with_header_row(header_row)),
AutoReaderOptions::Xlsx(e) => AutoReaderOptions::Xlsx(e.with_header_row(header_row)),
AutoReaderOptions::Xlsb(e) => AutoReaderOptions::Xlsb(e.with_header_row(header_row)),
AutoReaderOptions::Ods(e) => AutoReaderOptions::Ods(e.with_header_row(header_row)),
}
}
}

impl<RS> Reader<RS> for Sheets<RS>
where
RS: std::io::Read + std::io::Seek,
{
type Error = Error;
type Options = AutoReaderOptions;

/// Creates a new instance.
fn new(_reader: RS) -> Result<Self, Self::Error> {
Err(Error::Msg("Sheets must be created from a Path"))
}

fn set_options(&mut self, options: Self::Options) {
match *self {
Sheets::Xls(ref mut e) => match options {
AutoReaderOptions::Xls(opts) => e.set_options(opts),
_ => unreachable!(),
},
Sheets::Xlsx(ref mut e) => match options {
AutoReaderOptions::Xlsx(opts) => e.set_options(opts),
_ => unreachable!(),
},
Sheets::Xlsb(ref mut e) => match options {
AutoReaderOptions::Xlsb(opts) => e.set_options(opts),
_ => unreachable!(),
},
Sheets::Ods(ref mut e) => match options {
AutoReaderOptions::Ods(opts) => e.set_options(opts),
_ => unreachable!(),
},
}
}
PrettyWood marked this conversation as resolved.
Show resolved Hide resolved

/// Gets `VbaProject`
fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, Self::Error>> {
match *self {
Expand Down
24 changes: 21 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ pub use crate::auto::{open_workbook_auto, open_workbook_auto_from_rs, Sheets};
pub use crate::datatype::{Data, DataRef, DataType, ExcelDateTime, ExcelDateTimeType};
pub use crate::de::{DeError, RangeDeserializer, RangeDeserializerBuilder, ToCellDeserializer};
pub use crate::errors::Error;
pub use crate::ods::{Ods, OdsError};
pub use crate::ods::{Ods, OdsError, OdsOptions};
pub use crate::xls::{Xls, XlsError, XlsOptions};
pub use crate::xlsb::{Xlsb, XlsbError};
pub use crate::xlsx::{Xlsx, XlsxError};
pub use crate::xlsb::{Xlsb, XlsbError, XlsbOptions};
pub use crate::xlsx::{Xlsx, XlsxError, XlsxOptions};

use crate::vba::VbaProject;

Expand Down Expand Up @@ -215,6 +215,12 @@ pub struct Sheet {
pub visible: SheetVisible,
}

/// A trait to share reader options across different `FileType`s
pub trait ReaderOptions: Sized {
/// Set the header row
fn with_header_row(self, _header_row: u32) -> Self;
}

// FIXME `Reader` must only be seek `Seek` for `Xls::xls`. Because of the present API this limits
// the kinds of readers (other) data in formats can be read from.
/// A trait to share spreadsheets reader functions across different `FileType`s
Expand All @@ -225,9 +231,21 @@ where
/// Error specific to file type
type Error: std::fmt::Debug + From<std::io::Error>;

/// Options specific to file type
type Options: ReaderOptions;

/// Creates a new instance.
fn new(reader: RS) -> Result<Self, Self::Error>;

/// Set options
fn set_options(&mut self, options: Self::Options);

/// Set options and return the reader
fn with_options(mut self, options: Self::Options) -> Self {
self.set_options(options);
self
}

/// Gets `VbaProject`
fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, Self::Error>>;

Expand Down
48 changes: 44 additions & 4 deletions src/ods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ use zip::read::{ZipArchive, ZipFile};
use zip::result::ZipError;

use crate::vba::VbaProject;
use crate::{Data, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible};
use crate::{
Data, DataType, Metadata, Range, Reader, ReaderOptions, Sheet, SheetType, SheetVisible,
};
use std::marker::PhantomData;

const MIMETYPE: &[u8] = b"application/vnd.oasis.opendocument.spreadsheet";
Expand Down Expand Up @@ -62,6 +64,23 @@ pub enum OdsError {
WorksheetNotFound(String),
}

/// Ods reader options
#[derive(Debug, Default)]
pub struct OdsOptions {
/// Index of the header row
/// If not set, the first non-empty row is considered the header row
pub header_row: Option<u32>,
}

impl ReaderOptions for OdsOptions {
/// Set the header row index
fn with_header_row(self, header_row: u32) -> Self {
Self {
header_row: Some(header_row),
}
}
}

from_err!(std::io::Error, OdsError, Io);
from_err!(zip::result::ZipError, OdsError, Zip);
from_err!(quick_xml::Error, OdsError, Xml);
Expand Down Expand Up @@ -116,13 +135,16 @@ pub struct Ods<RS> {
marker: PhantomData<RS>,
#[cfg(feature = "picture")]
pictures: Option<Vec<(String, Vec<u8>)>>,
/// Reader options
options: OdsOptions,
}

impl<RS> Reader<RS> for Ods<RS>
where
RS: Read + Seek,
{
type Error = OdsError;
type Options = OdsOptions;

fn new(reader: RS) -> Result<Self, OdsError> {
let mut zip = ZipArchive::new(reader)?;
Expand Down Expand Up @@ -161,9 +183,15 @@ where
sheets,
#[cfg(feature = "picture")]
pictures,
options: OdsOptions::default(),
})
}

/// Set options
fn set_options(&mut self, options: Self::Options) {
self.options = options;
}

/// Gets `VbaProject`
fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, OdsError>> {
None
Expand All @@ -176,10 +204,22 @@ where

/// Read worksheet data in corresponding worksheet path
fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, OdsError> {
self.sheets
let sheet = self
.sheets
.get(name)
.ok_or_else(|| OdsError::WorksheetNotFound(name.into()))
.map(|r| r.0.to_owned())
.ok_or_else(|| OdsError::WorksheetNotFound(name.into()))?
.0
.to_owned();

// If a header_row is defined, adjust the range
if let Some(header_row) = self.options.header_row {
if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
return Ok(sheet.range((header_row, start.1), end));
}
}

// Return the original range if no header row is set
Ok(sheet)
}

fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
Expand Down
47 changes: 44 additions & 3 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ use crate::utils::read_usize;
use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32};
use crate::vba::VbaProject;
use crate::{
Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible,
Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, ReaderOptions, Sheet,
SheetType, SheetVisible,
};

#[derive(Debug)]
Expand Down Expand Up @@ -136,6 +137,30 @@ pub struct XlsOptions {
///
/// [code page]: https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
pub force_codepage: Option<u16>,
/// Index of the header row
/// If not set, the first non-empty row is considered the header row
pub header_row: Option<u32>,
}

impl XlsOptions {
#[allow(dead_code)]
/// Set the code page
fn with_codepage(self, codepage: u16) -> Self {
Self {
force_codepage: Some(codepage),
..self
}
}
}

impl ReaderOptions for XlsOptions {
/// Set the header row index
fn with_header_row(self, header_row: u32) -> Self {
Self {
header_row: Some(header_row),
..self
}
}
}

struct SheetData {
Expand Down Expand Up @@ -226,11 +251,16 @@ impl<RS: Read + Seek> Xls<RS> {

impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
type Error = XlsError;
type Options = XlsOptions;

fn new(reader: RS) -> Result<Self, XlsError> {
Self::new_with_options(reader, XlsOptions::default())
}

fn set_options(&mut self, options: Self::Options) {
self.options = options;
}

fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, XlsError>> {
self.vba.as_ref().map(|vba| Ok(Cow::Borrowed(vba)))
}
Expand All @@ -241,10 +271,21 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
}

fn worksheet_range(&mut self, name: &str) -> Result<Range<Data>, XlsError> {
self.sheets
let sheet = self
.sheets
.get(name)
.map(|r| r.range.clone())
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))
.ok_or_else(|| XlsError::WorksheetNotFound(name.into()))?;

// If a header_row is defined, adjust the range
if let Some(header_row) = self.options.header_row {
if let (Some(start), Some(end)) = (sheet.start(), sheet.end()) {
return Ok(sheet.range((header_row, start.1), end));
}
}

// Return the original range if no header row is set
Ok(sheet)
}

fn worksheets(&mut self) -> Vec<(String, Range<Data>)> {
Expand Down
Loading