diff --git a/python/fastexcel/__init__.py b/python/fastexcel/__init__.py
index 8717e1b..60415cd 100644
--- a/python/fastexcel/__init__.py
+++ b/python/fastexcel/__init__.py
@@ -131,7 +131,7 @@ def load_sheet(
         use_columns: list[str] | list[int] | str | None = None,
         dtypes: DTypeMap | None = None,
     ) -> ExcelSheet:
-        """Loads a sheet by index or name.
+        """Loads a sheet lazily by index or name.
 
         :param idx_or_name: The index (starting at 0) or the name of the sheet to load.
         :param header_row: The index of the row containing the column labels, default index is 0.
@@ -165,9 +165,41 @@ def load_sheet(
                 schema_sample_rows=schema_sample_rows,
                 use_columns=use_columns,
                 dtypes=dtypes,
+                eager=False,
             )
         )
 
+    def load_sheet_eager(
+        self,
+        idx_or_name: int | str,
+        *,
+        header_row: int | None = 0,
+        column_names: list[str] | None = None,
+        skip_rows: int = 0,
+        n_rows: int | None = None,
+        schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | str | None = None,
+        dtypes: DTypeMap | None = None,
+    ) -> pa.RecordBatch:
+        """Loads a sheet eagerly by index or name.
+
+        For xlsx files, this will be faster and more memory-efficient, as it will use
+        `worksheet_range_ref` under the hood, which returns borrowed types.
+
+        Refer to `load_sheet` for parameter documentation
+        """
+        return self._reader.load_sheet(
+            idx_or_name=idx_or_name,
+            header_row=header_row,
+            column_names=column_names,
+            skip_rows=skip_rows,
+            n_rows=n_rows,
+            schema_sample_rows=schema_sample_rows,
+            use_columns=use_columns,
+            dtypes=dtypes,
+            eager=True,
+        )
+
     def load_sheet_by_name(
         self,
         name: str,
@@ -184,17 +216,15 @@ def load_sheet_by_name(
 
         Refer to `load_sheet` for parameter documentation
         """
-        return ExcelSheet(
-            self._reader.load_sheet(
-                name,
-                header_row=header_row,
-                column_names=column_names,
-                skip_rows=skip_rows,
-                n_rows=n_rows,
-                schema_sample_rows=schema_sample_rows,
-                use_columns=use_columns,
-                dtypes=dtypes,
-            )
+        return self.load_sheet(
+            name,
+            header_row=header_row,
+            column_names=column_names,
+            skip_rows=skip_rows,
+            n_rows=n_rows,
+            schema_sample_rows=schema_sample_rows,
+            use_columns=use_columns,
+            dtypes=dtypes,
         )
 
     def load_sheet_by_idx(
@@ -213,17 +243,15 @@ def load_sheet_by_idx(
 
         Refer to `load_sheet` for parameter documentation
         """
-        return ExcelSheet(
-            self._reader.load_sheet(
-                idx,
-                header_row=header_row,
-                column_names=column_names,
-                skip_rows=skip_rows,
-                n_rows=n_rows,
-                schema_sample_rows=schema_sample_rows,
-                use_columns=use_columns,
-                dtypes=dtypes,
-            )
+        return self.load_sheet(
+            idx,
+            header_row=header_row,
+            column_names=column_names,
+            skip_rows=skip_rows,
+            n_rows=n_rows,
+            schema_sample_rows=schema_sample_rows,
+            use_columns=use_columns,
+            dtypes=dtypes,
         )
 
     def __repr__(self) -> str:
diff --git a/python/fastexcel/_fastexcel.pyi b/python/fastexcel/_fastexcel.pyi
index 0d6d3e1..59e892e 100644
--- a/python/fastexcel/_fastexcel.pyi
+++ b/python/fastexcel/_fastexcel.pyi
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import typing
 from typing import Literal
 
 import pyarrow as pa
@@ -61,6 +62,7 @@ class _ExcelSheet:
 class _ExcelReader:
     """A class representing an open Excel file and allowing to read its sheets"""
 
+    @typing.overload
     def load_sheet(
         self,
         idx_or_name: str | int,
@@ -72,7 +74,22 @@ class _ExcelReader:
         schema_sample_rows: int | None = 1_000,
         use_columns: list[str] | list[int] | str | None = None,
         dtypes: DTypeMap | None = None,
+        eager: Literal[False] = ...,
     ) -> _ExcelSheet: ...
+    @typing.overload
+    def load_sheet(
+        self,
+        idx_or_name: str | int,
+        *,
+        header_row: int | None = 0,
+        column_names: list[str] | None = None,
+        skip_rows: int = 0,
+        n_rows: int | None = None,
+        schema_sample_rows: int | None = 1_000,
+        use_columns: list[str] | list[int] | str | None = None,
+        dtypes: DTypeMap | None = None,
+        eager: Literal[True] = ...,
+    ) -> pa.RecordBatch: ...
     @property
     def sheet_names(self) -> list[str]: ...
 
diff --git a/python/tests/test_eagerness.py b/python/tests/test_eagerness.py
new file mode 100644
index 0000000..80baba9
--- /dev/null
+++ b/python/tests/test_eagerness.py
@@ -0,0 +1,54 @@
+from datetime import date, datetime, timedelta
+
+import fastexcel
+import polars as pl
+from pandas.testing import assert_frame_equal as pd_assert_frame_equal
+from polars.testing import assert_frame_equal as pl_assert_frame_equal
+from pyarrow import RecordBatch
+from utils import path_for_fixture
+
+
+def test_load_sheet_eager_single_sheet() -> None:
+    excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx"))
+
+    eager_pandas = excel_reader.load_sheet_eager(0).to_pandas()
+    lazy_pandas = excel_reader.load_sheet(0).to_pandas()
+    pd_assert_frame_equal(eager_pandas, lazy_pandas)
+
+    eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager(0))
+    assert isinstance(eager_polars, pl.DataFrame)
+    lazy_polars = excel_reader.load_sheet(0).to_polars()
+    pl_assert_frame_equal(eager_polars, lazy_polars)
+
+
+def test_multiple_sheets_with_unnamed_columns():
+    excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx"))
+
+    eager_pandas = excel_reader.load_sheet_eager("With unnamed columns").to_pandas()
+    lazy_pandas = excel_reader.load_sheet("With unnamed columns").to_pandas()
+    pd_assert_frame_equal(eager_pandas, lazy_pandas)
+
+    eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager("With unnamed columns"))
+    assert isinstance(eager_polars, pl.DataFrame)
+    lazy_polars = excel_reader.load_sheet("With unnamed columns").to_polars()
+    pl_assert_frame_equal(eager_polars, lazy_polars)
+
+
+def test_eager_with_an_ods_file_should_return_a_recordbatch() -> None:
+    ods_reader = fastexcel.read_excel(path_for_fixture("dates.ods"))
+
+    record_batch = ods_reader.load_sheet_eager(0)
+    assert isinstance(record_batch, RecordBatch)
+    pl_df = pl.from_arrow(record_batch)
+    assert isinstance(pl_df, pl.DataFrame)
+    pl_assert_frame_equal(
+        pl_df,
+        pl.DataFrame(
+            {
+                "date": [date(2023, 6, 1)],
+                "datestr": ["2023-06-01T02:03:04+02:00"],
+                "time": [timedelta(hours=1, minutes=2, seconds=3)],
+                "datetime": [datetime(2023, 6, 1, 2, 3, 4)],
+            }
+        ).with_columns(*(pl.col(col).dt.cast_time_unit("ms") for col in ("datetime", "time"))),
+    )
diff --git a/src/error.rs b/src/error.rs
index e5e2226..29a1233 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,5 +1,7 @@
 use std::{error::Error, fmt::Display};
 
+use calamine::XlsxError;
+
 use crate::types::idx_or_name::IdxOrName;
 
 #[derive(Debug)]
@@ -14,6 +16,7 @@ pub(crate) enum FastExcelErrorKind {
     // the actual type has not much value for us, so we just store a string context
     ArrowError(String),
     InvalidParameters(String),
+    Internal(String),
 }
 
 impl Display for FastExcelErrorKind {
@@ -41,6 +44,7 @@ impl Display for FastExcelErrorKind {
             }
             FastExcelErrorKind::ArrowError(err) => write!(f, "arrow error: {err}"),
             FastExcelErrorKind::InvalidParameters(err) => write!(f, "invalid parameters: {err}"),
+            FastExcelErrorKind::Internal(err) => write!(f, "fastexcel error: {err}"),
         }
     }
 }
@@ -99,6 +103,12 @@ impl From<FastExcelErrorKind> for FastExcelError {
     }
 }
 
+impl From<XlsxError> for FastExcelError {
+    fn from(err: XlsxError) -> Self {
+        FastExcelErrorKind::CalamineError(calamine::Error::Xlsx(err)).into()
+    }
+}
+
 pub(crate) type FastExcelResult<T> = Result<T, FastExcelError>;
 
 impl<T> ErrorContext for FastExcelResult<T> {
@@ -181,6 +191,13 @@ pub(crate) mod py_errors {
         FastExcelError,
         "Provided parameters are invalid"
     );
+    // Internal error
+    create_exception!(
+        _fastexcel,
+        InternalError,
+        FastExcelError,
+        "Internal fastexcel error"
+    );
 
     pub(crate) trait IntoPyResult {
         type Inner;
@@ -217,6 +234,7 @@ pub(crate) mod py_errors {
                         FastExcelErrorKind::InvalidParameters(_) => {
                             InvalidParametersError::new_err(message)
                         }
+                        FastExcelErrorKind::Internal(_) => ArrowError::new_err(message),
                     })
                 }
             }
diff --git a/src/lib.rs b/src/lib.rs
index 3ae7070..2fe608e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,6 @@
 mod error;
 mod types;
+mod utils;
 
 use error::{py_errors, ErrorContext};
 use pyo3::prelude::*;
diff --git a/src/types/dtype.rs b/src/types/dtype.rs
index fb544ba..e77fe53 100644
--- a/src/types/dtype.rs
+++ b/src/types/dtype.rs
@@ -1,11 +1,12 @@
 use std::{
     collections::{HashMap, HashSet},
+    fmt::{Debug, Display},
     str::FromStr,
     sync::OnceLock,
 };
 
 use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
-use calamine::{CellErrorType, Data as CalData, DataType, Range};
+use calamine::{CellErrorType, CellType, DataType, Range};
 use pyo3::{FromPyObject, PyAny, PyObject, PyResult, Python, ToPyObject};
 
 use crate::error::{py_errors::IntoPyResult, FastExcelError, FastExcelErrorKind, FastExcelResult};
@@ -45,9 +46,9 @@ impl FromStr for DType {
     }
 }
 
-impl ToString for DType {
-    fn to_string(&self) -> String {
-        match self {
+impl Display for DType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(match self {
             DType::Null => "null",
             DType::Int => "int",
             DType::Float => "float",
@@ -56,8 +57,7 @@ impl ToString for DType {
             DType::DateTime => "datetime",
             DType::Date => "date",
             DType::Duration => "duration",
-        }
-        .to_string()
+        })
     }
 }
 
@@ -104,42 +104,68 @@ const NULL_STRING_VALUES: [&str; 19] = [
     "<NA>", "N/A", "NA", "NULL", "NaN", "None", "n/a", "nan", "null",
 ];
 
-fn get_cell_dtype(data: &Range<CalData>, row: usize, col: usize) -> FastExcelResult<DType> {
+fn get_cell_dtype<DT: CellType + Debug + DataType>(
+    data: &Range<DT>,
+    row: usize,
+    col: usize,
+) -> FastExcelResult<DType> {
     let cell = data
         .get((row, col))
         .ok_or_else(|| FastExcelErrorKind::CannotRetrieveCellData(row, col))?;
 
-    match cell {
-        CalData::Int(_) => Ok(DType::Int),
-        CalData::Float(_) => Ok(DType::Float),
-        CalData::String(v) => match v {
-            v if NULL_STRING_VALUES.contains(&v.as_str()) => Ok(DType::Null),
-            _ => Ok(DType::String),
-        },
-        CalData::Bool(_) => Ok(DType::Bool),
+    if cell.is_int() {
+        Ok(DType::Int)
+    } else if cell.is_float() {
+        Ok(DType::Float)
+    } else if cell.is_string() {
+        if NULL_STRING_VALUES.contains(&cell.get_string().unwrap()) {
+            Ok(DType::Null)
+        } else {
+            Ok(DType::String)
+        }
+    } else if cell.is_bool() {
+        Ok(DType::Bool)
+    } else if cell.is_datetime() {
         // Since calamine 0.24.0, a new ExcelDateTime exists for the Datetime type. It can either be
         // a duration or a datatime
-        CalData::DateTime(excel_datetime) => Ok(if excel_datetime.is_datetime() {
+        let excel_datetime = cell
+            .get_datetime()
+            .expect("calamine indicated that cell is a datetime but get_datetime returned None");
+        Ok(if excel_datetime.is_datetime() {
             DType::DateTime
         } else {
             DType::Duration
-        }),
-        // These types contain an ISO8601 representation of a date/datetime or a duration
-        CalData::DateTimeIso(_) => match cell.as_datetime() {
-            Some(_) => Ok(DType::DateTime),
+        })
+    }
+    // These types contain an ISO8601 representation of a date/datetime or a durat
+    else if cell.is_datetime_iso() {
+        match cell.as_datetime() {
             // If we cannot convert the cell to a datetime, we're working on a date
+            Some(_) => Ok(DType::DateTime),
             // NOTE: not using the Date64 type on purpose, as pyarrow converts it to a datetime
             // rather than a date
             None => Ok(DType::Date),
-        },
-        // A simple duration
-        CalData::DurationIso(_) => Ok(DType::Duration),
-        // Errors and nulls
-        CalData::Error(err) => match err {
-            CellErrorType::NA | CellErrorType::Value | CellErrorType::Null => Ok(DType::Null),
-            _ => Err(FastExcelErrorKind::CalamineCellError(err.to_owned()).into()),
-        },
-        CalData::Empty => Ok(DType::Null),
+        }
+    }
+    // Simple durations
+    else if cell.is_duration_iso() {
+        Ok(DType::Duration)
+    }
+    // Empty cell
+    else if cell.is_empty() {
+        Ok(DType::Null)
+    } else if cell.is_error() {
+        match cell.get_error() {
+            // considering cells with #N/A! as null
+            Some(CellErrorType::NA | CellErrorType::Value | CellErrorType::Null) => Ok(DType::Null),
+            Some(err) => Err(FastExcelErrorKind::CalamineCellError(err.to_owned()).into()),
+            None => Err(FastExcelErrorKind::Internal(format!(
+                "cell is an error but get_error returned None: {cell:?}"
+            ))
+            .into()),
+        }
+    } else {
+        Err(FastExcelErrorKind::Internal(format!("unsupported cell type: {cell:?}")).into())
     }
 }
 
@@ -159,8 +185,8 @@ fn string_types() -> &'static HashSet<DType> {
     STRING_TYPES_CELL.get_or_init(|| HashSet::from([DType::Int, DType::Float, DType::String]))
 }
 
-pub(crate) fn get_dtype_for_column(
-    data: &Range<CalData>,
+pub(crate) fn get_dtype_for_column<DT: CellType + Debug + DataType>(
+    data: &Range<DT>,
     start_row: usize,
     end_row: usize,
     col: usize,
@@ -198,7 +224,7 @@ pub(crate) fn get_dtype_for_column(
 
 #[cfg(test)]
 mod tests {
-    use calamine::Cell;
+    use calamine::{Cell, Data as CalData};
     use rstest::{fixture, rstest};
 
     use super::*;
diff --git a/src/types/python/excelreader.rs b/src/types/python/excelreader.rs
index e295e6c..35fdec6 100644
--- a/src/types/python/excelreader.rs
+++ b/src/types/python/excelreader.rs
@@ -3,8 +3,15 @@ use std::{
     io::{BufReader, Cursor},
 };
 
-use calamine::{open_workbook_auto, open_workbook_auto_from_rs, Data, Range, Reader, Sheets};
-use pyo3::{pyclass, pymethods, PyAny, PyResult};
+use arrow::{
+    datatypes::{Field, Schema},
+    pyarrow::ToPyArrow,
+    record_batch::RecordBatch,
+};
+use calamine::{
+    open_workbook_auto, open_workbook_auto_from_rs, Data, DataRef, Range, Reader, Sheets,
+};
+use pyo3::{prelude::PyObject, pyclass, pymethods, IntoPy, PyAny, PyResult, Python};
 
 use crate::{
     error::{
@@ -13,6 +20,13 @@ use crate::{
     types::{dtype::DTypeMap, idx_or_name::IdxOrName},
 };
 
+use crate::utils::schema::get_schema_sample_rows;
+
+use super::excelsheet::record_batch_from_data_and_schema;
+use super::excelsheet::{
+    column_info::{build_available_columns, build_available_columns_info},
+    sheet_data::ExcelSheetData,
+};
 use super::excelsheet::{ExcelSheet, Header, Pagination, SelectedColumns};
 
 enum ExcelSheets {
@@ -37,6 +51,25 @@ impl ExcelSheets {
             Self::Bytes(sheets) => sheets.sheet_names(),
         }
     }
+
+    fn supports_by_ref(&self) -> bool {
+        matches!(
+            self,
+            Self::File(Sheets::Xlsx(_)) | Self::Bytes(Sheets::Xlsx(_))
+        )
+    }
+
+    fn worksheet_range_ref<'a>(&'a mut self, name: &str) -> FastExcelResult<Range<DataRef<'a>>> {
+        match self {
+            ExcelSheets::File(Sheets::Xlsx(sheets)) => Ok(sheets.worksheet_range_ref(name)?),
+            ExcelSheets::Bytes(Sheets::Xlsx(sheets)) => Ok(sheets.worksheet_range_ref(name)?),
+            _ => Err(FastExcelErrorKind::Internal(
+                "sheets do not support worksheet_range_ref".to_string(),
+            )
+            .into()),
+        }
+        .with_context(|| format!("Error while loading sheet {name}"))
+    }
 }
 
 #[pyclass(name = "_ExcelReader")]
@@ -48,6 +81,10 @@ pub(crate) struct ExcelReader {
 }
 
 impl ExcelReader {
+    fn build_selected_columns(use_columns: Option<&PyAny>) -> FastExcelResult<SelectedColumns> {
+        use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}"))
+    }
+
     // NOTE: Not implementing TryFrom here, because we're aren't building the file from the passed
     // string, but rather from the file pointed by it. Semantically, try_from_path is clearer
     pub(crate) fn try_from_path(path: &str) -> FastExcelResult<Self> {
@@ -62,8 +99,44 @@ impl ExcelReader {
         })
     }
 
-    fn build_selected_columns(use_columns: Option<&PyAny>) -> FastExcelResult<SelectedColumns> {
-        use_columns.try_into().with_context(|| format!("expected selected columns to be list[str] | list[int] | str | None, got {use_columns:?}"))
+    fn load_sheet_eager(
+        data: &ExcelSheetData,
+        pagination: Pagination,
+        header: Header,
+        sample_rows: Option<usize>,
+        selected_columns: &SelectedColumns,
+        dtypes: Option<&DTypeMap>,
+    ) -> FastExcelResult<RecordBatch> {
+        let offset = header.offset() + pagination.offset();
+        let limit = {
+            let upper_bound = data.height();
+            if let Some(n_rows) = pagination.n_rows() {
+                // minimum value between (offset+n_rows) and the data's height
+                std::cmp::min(offset + n_rows, upper_bound)
+            } else {
+                upper_bound
+            }
+        };
+
+        let sample_rows_limit = get_schema_sample_rows(sample_rows, offset, limit);
+        let available_columns_info = build_available_columns_info(data, selected_columns, &header)?;
+
+        let available_columns = build_available_columns(
+            available_columns_info,
+            data,
+            offset,
+            sample_rows_limit,
+            dtypes,
+        )?;
+
+        let fields = available_columns
+            .iter()
+            .map(Into::<Field>::into)
+            .collect::<Vec<_>>();
+
+        let schema = Schema::new(fields);
+
+        record_batch_from_data_and_schema(schema, data, offset, limit)
     }
 
     #[allow(clippy::too_many_arguments)]
@@ -77,21 +150,44 @@ impl ExcelReader {
         schema_sample_rows: Option<usize>,
         use_columns: Option<&PyAny>,
         dtypes: Option<DTypeMap>,
-    ) -> FastExcelResult<ExcelSheet> {
-        let range = self.sheets.worksheet_range(&name)?;
-
+        eager: bool,
+        py: Python<'_>,
+    ) -> PyResult<PyObject> {
         let header = Header::new(header_row, column_names);
-        let pagination = Pagination::new(skip_rows, n_rows, &range)?;
-        let selected_columns = Self::build_selected_columns(use_columns)?;
-        ExcelSheet::try_new(
-            name,
-            range,
-            header,
-            pagination,
-            schema_sample_rows,
-            selected_columns,
-            dtypes,
-        )
+        let selected_columns = Self::build_selected_columns(use_columns).into_pyresult()?;
+        if eager && self.sheets.supports_by_ref() {
+            let range = self.sheets.worksheet_range_ref(&name).into_pyresult()?;
+            let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
+            Self::load_sheet_eager(
+                &range.into(),
+                pagination,
+                header,
+                schema_sample_rows,
+                &selected_columns,
+                dtypes.as_ref(),
+            )
+            .into_pyresult()
+            .and_then(|rb| rb.to_pyarrow(py))
+        } else {
+            let range = self.sheets.worksheet_range(&name).into_pyresult()?;
+            let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
+            let sheet = ExcelSheet::try_new(
+                name,
+                range.into(),
+                header,
+                pagination,
+                schema_sample_rows,
+                selected_columns,
+                dtypes,
+            )
+            .into_pyresult()?;
+
+            if eager {
+                sheet.to_arrow(py)
+            } else {
+                Ok(sheet.into_py(py))
+            }
+        }
     }
 }
 
@@ -128,6 +224,7 @@ impl ExcelReader {
         schema_sample_rows = 1_000,
         use_columns = None,
         dtypes = None,
+        eager = false,
     ))]
     #[allow(clippy::too_many_arguments)]
     pub fn load_sheet(
@@ -140,7 +237,9 @@ impl ExcelReader {
         schema_sample_rows: Option<usize>,
         use_columns: Option<&PyAny>,
         dtypes: Option<DTypeMap>,
-    ) -> PyResult<ExcelSheet> {
+        eager: bool,
+        py: Python<'_>,
+    ) -> PyResult<PyObject> {
         let name = idx_or_name
             .try_into()
             .and_then(|idx_or_name| match idx_or_name {
@@ -179,7 +278,8 @@ impl ExcelReader {
             schema_sample_rows,
             use_columns,
             dtypes,
+            eager,
+            py,
         )
-        .into_pyresult()
     }
 }
diff --git a/src/types/python/excelsheet/column_info.rs b/src/types/python/excelsheet/column_info.rs
index 8e502ac..0947d22 100644
--- a/src/types/python/excelsheet/column_info.rs
+++ b/src/types/python/excelsheet/column_info.rs
@@ -1,6 +1,6 @@
-use std::{str::FromStr, usize};
+use std::{fmt::Display, str::FromStr};
 
-use calamine::{Data as CalData, Range};
+use arrow::datatypes::Field;
 use pyo3::{pyclass, pymethods, PyResult};
 
 use crate::{
@@ -8,11 +8,13 @@ use crate::{
         py_errors::IntoPyResult, ErrorContext, FastExcelError, FastExcelErrorKind, FastExcelResult,
     },
     types::{
-        dtype::{get_dtype_for_column, DType, DTypeMap},
+        dtype::{DType, DTypeMap},
         idx_or_name::IdxOrName,
     },
 };
 
+use super::{sheet_data::ExcelSheetData, Header, SelectedColumns};
+
 #[derive(Debug, Clone, PartialEq)]
 pub(crate) enum ColumnNameFrom {
     Provided,
@@ -36,14 +38,13 @@ impl FromStr for ColumnNameFrom {
     }
 }
 
-impl ToString for ColumnNameFrom {
-    fn to_string(&self) -> String {
-        match self {
+impl Display for ColumnNameFrom {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(match self {
             ColumnNameFrom::Provided => "provided",
             ColumnNameFrom::LookedUp => "looked_up",
             ColumnNameFrom::Generated => "generated",
-        }
-        .to_string()
+        })
     }
 }
 
@@ -54,14 +55,13 @@ pub(crate) enum DTypeFrom {
     Guessed,
 }
 
-impl ToString for DTypeFrom {
-    fn to_string(&self) -> String {
-        match self {
+impl Display for DTypeFrom {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(match self {
             DTypeFrom::ProvidedByIndex => "provided_by_index",
             DTypeFrom::ProvidedByName => "provided_by_name",
             DTypeFrom::Guessed => "guessed",
-        }
-        .to_string()
+        })
     }
 }
 
@@ -127,6 +127,12 @@ impl ColumnInfo {
     }
 }
 
+impl From<&ColumnInfo> for Field {
+    fn from(col_info: &ColumnInfo) -> Self {
+        Field::new(col_info.name(), col_info.dtype().into(), true)
+    }
+}
+
 #[pymethods]
 impl ColumnInfo {
     /// Creates a new ColumnInfo object.
@@ -182,7 +188,7 @@ impl ColumnInfo {
     }
 
     pub fn __repr__(&self) -> String {
-        format!("ColumnInfo(name=\"{name}\", index={index}, dtype=\"{dtype}\", dtype_from=\"{dtype_from}\", column_name_from=\"{column_name_from}\" )", name=self.name, index=self.index, dtype=self.dtype.to_string(), dtype_from=self.dtype_from.to_string(), column_name_from=self.column_name_from.to_string())
+        format!("ColumnInfo(name=\"{name}\", index={index}, dtype=\"{dtype}\", dtype_from=\"{dtype_from}\", column_name_from=\"{column_name_from}\" )", name=self.name, index=self.index, dtype=self.dtype, dtype_from=self.dtype_from, column_name_from=self.column_name_from)
     }
 
     pub fn __eq__(&self, other: &Self) -> bool {
@@ -191,7 +197,7 @@ impl ColumnInfo {
 }
 
 #[derive(Debug)]
-pub(super) struct ColumnInfoBuilder {
+pub(crate) struct ColumnInfoBuilder {
     name: String,
     index: usize,
     column_name_from: ColumnNameFrom,
@@ -227,7 +233,7 @@ impl ColumnInfoBuilder {
 
     fn dtype_info(
         &self,
-        data: &Range<CalData>,
+        data: &ExcelSheetData<'_>,
         start_row: usize,
         end_row: usize,
         specified_dtypes: Option<&DTypeMap>,
@@ -247,14 +253,14 @@ impl ColumnInfoBuilder {
             .map(FastExcelResult::Ok)
             // If we could not look up a dtype, guess it from the data
             .unwrap_or_else(|| {
-                get_dtype_for_column(data, start_row, end_row, self.index)
+                data.dtype_for_column(start_row, end_row, self.index)
                     .map(|dtype| (dtype, DTypeFrom::Guessed))
             })
     }
 
     pub(super) fn finish(
         self,
-        data: &Range<CalData>,
+        data: &ExcelSheetData<'_>,
         start_row: usize,
         end_row: usize,
         specified_dtypes: Option<&DTypeMap>,
@@ -271,3 +277,144 @@ impl ColumnInfoBuilder {
         ))
     }
 }
+
+pub(crate) fn build_available_columns_info(
+    data: &ExcelSheetData<'_>,
+    selected_columns: &SelectedColumns,
+    header: &Header,
+) -> FastExcelResult<Vec<ColumnInfoBuilder>> {
+    let width = data.width();
+    match header {
+        Header::None => Ok((0..width)
+            .map(|col_idx| {
+                ColumnInfoBuilder::new(
+                    format!("__UNNAMED__{col_idx}"),
+                    col_idx,
+                    ColumnNameFrom::Generated,
+                )
+            })
+            .collect()),
+        Header::At(row_idx) => Ok((0..width)
+            .map(|col_idx| {
+                data.get_as_string((*row_idx, col_idx))
+                    .map(|col_name| {
+                        ColumnInfoBuilder::new(col_name, col_idx, ColumnNameFrom::LookedUp)
+                    })
+                    .unwrap_or_else(|| {
+                        ColumnInfoBuilder::new(
+                            format!("__UNNAMED__{col_idx}"),
+                            col_idx,
+                            ColumnNameFrom::Generated,
+                        )
+                    })
+            })
+            .collect()),
+        Header::With(names) => {
+            if let SelectedColumns::Selection(column_selection) = selected_columns {
+                if column_selection.len() != names.len() {
+                    return Err(FastExcelErrorKind::InvalidParameters(
+                        "column_names and use_columns must have the same length".to_string(),
+                    )
+                    .into());
+                }
+                let selected_indices = column_selection
+                        .iter()
+                        .map(|idx_or_name| {
+                            match idx_or_name {
+                        IdxOrName::Idx(idx) => Ok(*idx),
+                        IdxOrName::Name(name) => Err(FastExcelErrorKind::InvalidParameters(
+                            format!("use_columns can only contain integers when used with columns_names, got \"{name}\"")
+                        )
+                        .into()),
+                    }
+                        })
+                        .collect::<FastExcelResult<Vec<_>>>()?;
+
+                Ok((0..width)
+                    .map(|col_idx| {
+                        let provided_name_opt = if let Some(pos_in_names) =
+                            selected_indices.iter().position(|idx| idx == &col_idx)
+                        {
+                            names.get(pos_in_names).cloned()
+                        } else {
+                            None
+                        };
+
+                        match provided_name_opt {
+                            Some(provided_name) => ColumnInfoBuilder::new(
+                                provided_name,
+                                col_idx,
+                                ColumnNameFrom::Provided,
+                            ),
+                            None => ColumnInfoBuilder::new(
+                                format!("__UNNAMED__{col_idx}"),
+                                col_idx,
+                                ColumnNameFrom::Generated,
+                            ),
+                        }
+                    })
+                    .collect())
+            } else {
+                let nameless_start_idx = names.len();
+                Ok(names
+                    .iter()
+                    .enumerate()
+                    .map(|(col_idx, name)| {
+                        ColumnInfoBuilder::new(name.to_owned(), col_idx, ColumnNameFrom::Provided)
+                    })
+                    .chain((nameless_start_idx..width).map(|col_idx| {
+                        ColumnInfoBuilder::new(
+                            format!("__UNNAMED__{col_idx}"),
+                            col_idx,
+                            ColumnNameFrom::Generated,
+                        )
+                    }))
+                    .collect())
+            }
+        }
+    }
+}
+
+fn alias_for_name(name: &str, existing_names: &[String]) -> String {
+    #[inline]
+    fn rec(name: &str, existing_names: &[String], depth: usize) -> String {
+        let alias = if depth == 0 {
+            name.to_owned()
+        } else {
+            format!("{name}_{depth}")
+        };
+        match existing_names
+            .iter()
+            .any(|existing_name| existing_name == &alias)
+        {
+            true => rec(name, existing_names, depth + 1),
+            false => alias,
+        }
+    }
+
+    rec(name, existing_names, 0)
+}
+
+pub(crate) fn build_available_columns(
+    available_columns_info: Vec<ColumnInfoBuilder>,
+    data: &ExcelSheetData,
+    start_row: usize,
+    end_row: usize,
+    specified_dtypes: Option<&DTypeMap>,
+) -> FastExcelResult<Vec<ColumnInfo>> {
+    let mut aliased_available_columns = Vec::with_capacity(available_columns_info.len());
+
+    available_columns_info
+        .into_iter()
+        .map(|mut column_info_builder| {
+            // Setting the right alias for every column
+            let alias = alias_for_name(column_info_builder.name(), &aliased_available_columns);
+            if alias != column_info_builder.name() {
+                column_info_builder = column_info_builder.with_name(alias.clone());
+            }
+            aliased_available_columns.push(alias);
+            // Setting the dtype info
+            column_info_builder.finish(data, start_row, end_row, specified_dtypes)
+        })
+        .collect()
+}
diff --git a/src/types/python/excelsheet/mod.rs b/src/types/python/excelsheet/mod.rs
index 4792891..6f54f59 100644
--- a/src/types/python/excelsheet/mod.rs
+++ b/src/types/python/excelsheet/mod.rs
@@ -1,6 +1,8 @@
 pub(crate) mod column_info;
+pub(crate) mod sheet_data;
 
-use std::{cmp, collections::HashSet, str::FromStr, sync::Arc};
+use calamine::{CellType, Range};
+use std::{cmp, collections::HashSet, fmt::Debug, str::FromStr, sync::Arc};
 
 use crate::{
     error::{
@@ -11,18 +13,14 @@ use crate::{
         idx_or_name::IdxOrName,
     },
 };
+use sheet_data::ExcelSheetData;
 
 use arrow::{
-    array::{
-        Array, BooleanArray, Date32Array, DurationMillisecondArray, Float64Array, Int64Array,
-        NullArray, StringArray, TimestampMillisecondArray,
-    },
-    datatypes::{Field, Schema},
+    array::NullArray,
+    datatypes::{DataType as ArrowDataType, Field, Schema, TimeUnit},
     pyarrow::ToPyArrow,
     record_batch::RecordBatch,
 };
-use calamine::{Data as CalData, DataType, Range};
-use chrono::NaiveDate;
 
 use pyo3::{
     prelude::{pyclass, pymethods, PyObject, Python},
@@ -30,7 +28,13 @@ use pyo3::{
     PyAny, PyResult, ToPyObject,
 };
 
-use self::column_info::{ColumnInfo, ColumnInfoBuilder, ColumnNameFrom};
+use crate::utils::schema::get_schema_sample_rows;
+
+use self::column_info::{build_available_columns, build_available_columns_info, ColumnInfo};
+use self::sheet_data::{
+    create_boolean_array, create_date_array, create_datetime_array, create_duration_array,
+    create_float_array, create_int_array, create_string_array,
+};
 
 #[derive(Debug)]
 pub(crate) enum Header {
@@ -65,10 +69,10 @@ pub(crate) struct Pagination {
 }
 
 impl Pagination {
-    pub(crate) fn new(
+    pub(crate) fn new<CT: CellType>(
         skip_rows: usize,
         n_rows: Option<usize>,
-        range: &Range<CalData>,
+        range: &Range<CT>,
     ) -> FastExcelResult<Self> {
         let max_height = range.height();
         if max_height < skip_rows {
@@ -84,6 +88,10 @@ impl Pagination {
     pub(crate) fn offset(&self) -> usize {
         self.skip_rows
     }
+
+    pub(crate) fn n_rows(&self) -> Option<usize> {
+        self.n_rows
+    }
 }
 impl TryFrom<&PyList> for SelectedColumns {
     type Error = FastExcelError;
@@ -137,6 +145,7 @@ impl SelectedColumns {
                 .collect(),
         }
     }
+
     const ALPHABET: [char; 26] = [
         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
         'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
@@ -276,32 +285,13 @@ impl TryFrom<Option<&PyAny>> for SelectedColumns {
     }
 }
 
-fn alias_for_name(name: &str, existing_names: &[String]) -> String {
-    fn rec(name: &str, existing_names: &[String], depth: usize) -> String {
-        let alias = if depth == 0 {
-            name.to_owned()
-        } else {
-            format!("{name}_{depth}")
-        };
-        match existing_names
-            .iter()
-            .any(|existing_name| existing_name == &alias)
-        {
-            true => rec(name, existing_names, depth + 1),
-            false => alias,
-        }
-    }
-
-    rec(name, existing_names, 0)
-}
-
 #[pyclass(name = "_ExcelSheet")]
 pub(crate) struct ExcelSheet {
     #[pyo3(get)]
     pub(crate) name: String,
     header: Header,
     pagination: Pagination,
-    data: Range<CalData>,
+    data: ExcelSheetData<'static>,
     height: Option<usize>,
     total_height: Option<usize>,
     width: Option<usize>,
@@ -313,19 +303,21 @@ pub(crate) struct ExcelSheet {
 }
 
 impl ExcelSheet {
-    pub(crate) fn data(&self) -> &Range<CalData> {
+    pub(crate) fn data(&self) -> &ExcelSheetData<'_> {
         &self.data
     }
 
     pub(crate) fn try_new(
         name: String,
-        data: Range<CalData>,
+        data: ExcelSheetData<'static>,
         header: Header,
         pagination: Pagination,
         schema_sample_rows: Option<usize>,
         selected_columns: SelectedColumns,
         dtypes: Option<DTypeMap>,
     ) -> FastExcelResult<Self> {
+        let available_columns_info =
+            build_available_columns_info(&data, &selected_columns, &header)?;
         let mut sheet = ExcelSheet {
             name,
             header,
@@ -341,33 +333,17 @@ impl ExcelSheet {
             selected_columns: Vec::with_capacity(0),
         };
 
-        let available_columns_info = sheet.get_available_columns_info(&selected_columns)?;
-
-        let mut aliased_available_columns = Vec::with_capacity(available_columns_info.len());
-
-        let dtype_sample_rows =
-            sheet.offset() + sheet.schema_sample_rows().unwrap_or(sheet.limit());
-        let row_limit = cmp::min(dtype_sample_rows, sheet.limit());
+        let row_limit = sheet.schema_sample_rows();
 
         // Finalizing column info
-        let available_columns = available_columns_info
-            .into_iter()
-            .map(|mut column_info_builder| {
-                // Setting the right alias for every column
-                let alias = alias_for_name(column_info_builder.name(), &aliased_available_columns);
-                if alias != column_info_builder.name() {
-                    column_info_builder = column_info_builder.with_name(alias.clone());
-                }
-                aliased_available_columns.push(alias);
-                // Setting the dtype info
-                column_info_builder.finish(
-                    &sheet.data,
-                    sheet.offset(),
-                    row_limit,
-                    sheet.dtypes.as_ref(),
-                )
-            })
-            .collect::<FastExcelResult<Vec<_>>>()?;
+        let available_columns = build_available_columns(
+            available_columns_info,
+            &sheet.data,
+            sheet.offset(),
+            row_limit,
+            sheet.dtypes.as_ref(),
+        )?;
+
         let selected_columns = selected_columns.select_columns(&available_columns)?;
         sheet.available_columns = available_columns;
         sheet.selected_columns = selected_columns;
@@ -376,108 +352,6 @@ impl ExcelSheet {
         Ok(sheet)
     }
 
-    fn get_available_columns_info(
-        &self,
-        selected_columns: &SelectedColumns,
-    ) -> FastExcelResult<Vec<ColumnInfoBuilder>> {
-        let width = self.data.width();
-        match &self.header {
-            Header::None => Ok((0..width)
-                .map(|col_idx| {
-                    ColumnInfoBuilder::new(
-                        format!("__UNNAMED__{col_idx}"),
-                        col_idx,
-                        ColumnNameFrom::Generated,
-                    )
-                })
-                .collect()),
-            Header::At(row_idx) => Ok((0..width)
-                .map(|col_idx| {
-                    self.data
-                        .get((*row_idx, col_idx))
-                        .and_then(|data| data.as_string())
-                        .map(|col_name| {
-                            ColumnInfoBuilder::new(col_name, col_idx, ColumnNameFrom::LookedUp)
-                        })
-                        .unwrap_or_else(|| {
-                            ColumnInfoBuilder::new(
-                                format!("__UNNAMED__{col_idx}"),
-                                col_idx,
-                                ColumnNameFrom::Generated,
-                            )
-                        })
-                })
-                .collect()),
-            Header::With(names) => {
-                if let SelectedColumns::Selection(column_selection) = selected_columns {
-                    if column_selection.len() != names.len() {
-                        return Err(FastExcelErrorKind::InvalidParameters(
-                            "column_names and use_columns must have the same length".to_string(),
-                        )
-                        .into());
-                    }
-                    let selected_indices = column_selection
-                        .iter()
-                        .map(|idx_or_name| {
-                            match idx_or_name {
-                        IdxOrName::Idx(idx) => Ok(*idx),
-                        IdxOrName::Name(name) => Err(FastExcelErrorKind::InvalidParameters(
-                            format!("use_columns can only contain integers when used with columns_names, got \"{name}\"")
-                        )
-                        .into()),
-                    }
-                        })
-                        .collect::<FastExcelResult<Vec<_>>>()?;
-
-                    Ok((0..width)
-                        .map(|col_idx| {
-                            let provided_name_opt = if let Some(pos_in_names) =
-                                selected_indices.iter().position(|idx| idx == &col_idx)
-                            {
-                                names.get(pos_in_names).cloned()
-                            } else {
-                                None
-                            };
-
-                            match provided_name_opt {
-                                Some(provided_name) => ColumnInfoBuilder::new(
-                                    provided_name,
-                                    col_idx,
-                                    ColumnNameFrom::Provided,
-                                ),
-                                None => ColumnInfoBuilder::new(
-                                    format!("__UNNAMED__{col_idx}"),
-                                    col_idx,
-                                    ColumnNameFrom::Generated,
-                                ),
-                            }
-                        })
-                        .collect())
-                } else {
-                    let nameless_start_idx = names.len();
-                    Ok(names
-                        .iter()
-                        .enumerate()
-                        .map(|(col_idx, name)| {
-                            ColumnInfoBuilder::new(
-                                name.to_owned(),
-                                col_idx,
-                                ColumnNameFrom::Provided,
-                            )
-                        })
-                        .chain((nameless_start_idx..width).map(|col_idx| {
-                            ColumnInfoBuilder::new(
-                                format!("__UNNAMED__{col_idx}"),
-                                col_idx,
-                                ColumnNameFrom::Generated,
-                            )
-                        }))
-                        .collect())
-                }
-            }
-        }
-    }
-
     pub(crate) fn limit(&self) -> usize {
         let upper_bound = self.data.height();
         if let Some(n_rows) = self.pagination.n_rows {
@@ -490,125 +364,63 @@ impl ExcelSheet {
         upper_bound
     }
 
-    pub(crate) fn schema_sample_rows(&self) -> &Option<usize> {
-        &self.schema_sample_rows
+    pub(crate) fn schema_sample_rows(&self) -> usize {
+        get_schema_sample_rows(self.schema_sample_rows, self.offset(), self.limit())
     }
 }
 
-fn create_boolean_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    Arc::new(BooleanArray::from_iter((offset..limit).map(|row| {
-        data.get((row, col)).and_then(|cell| match cell {
-            CalData::Bool(b) => Some(*b),
-            CalData::Int(i) => Some(*i != 0),
-            CalData::Float(f) => Some(*f != 0.0),
-            _ => None,
-        })
-    })))
-}
-
-fn create_int_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    Arc::new(Int64Array::from_iter(
-        (offset..limit).map(|row| data.get((row, col)).and_then(|cell| cell.as_i64())),
-    ))
-}
-
-fn create_float_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    Arc::new(Float64Array::from_iter(
-        (offset..limit).map(|row| data.get((row, col)).and_then(|cell| cell.as_f64())),
-    ))
-}
-
-fn create_string_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    Arc::new(StringArray::from_iter((offset..limit).map(|row| {
-        // NOTE: Not using cell.as_string() here because it matches the String variant last, which
-        // is slower for columns containing mostly/only strings (which we expect to meet more often than
-        // mixed dtype columns containing mostly numbers)
-        data.get((row, col)).and_then(|cell| match cell {
-            CalData::String(s) => Some(s.to_string()),
-            CalData::Float(s) => Some(s.to_string()),
-            CalData::Int(s) => Some(s.to_string()),
-            CalData::DateTime(dt) => dt.as_datetime().map(|dt| dt.to_string()),
-            CalData::DateTimeIso(dt) => Some(dt.to_string()),
-            _ => None,
-        })
-    })))
-}
-
-fn duration_type_to_i64(caldt: &CalData) -> Option<i64> {
-    caldt.as_duration().map(|d| d.num_milliseconds())
-}
-
-fn create_date_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
-    Arc::new(Date32Array::from_iter((offset..limit).map(|row| {
-        data.get((row, col))
-            .and_then(|caldate| caldate.as_date())
-            .and_then(|date| i32::try_from(date.signed_duration_since(epoch).num_days()).ok())
-    })))
-}
-
-fn create_datetime_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    Arc::new(TimestampMillisecondArray::from_iter((offset..limit).map(
-        |row| {
-            data.get((row, col))
-                .and_then(|caldt| caldt.as_datetime())
-                .map(|dt| dt.and_utc().timestamp_millis())
-        },
-    )))
-}
-
-fn create_duration_array(
-    data: &Range<CalData>,
-    col: usize,
-    offset: usize,
-    limit: usize,
-) -> Arc<dyn Array> {
-    Arc::new(DurationMillisecondArray::from_iter(
-        (offset..limit).map(|row| data.get((row, col)).and_then(duration_type_to_i64)),
-    ))
-}
-
 impl From<&ExcelSheet> for Schema {
     fn from(sheet: &ExcelSheet) -> Self {
         let fields: Vec<_> = sheet
             .selected_columns
             .iter()
-            .map(|col_info| Field::new(col_info.name(), col_info.dtype().into(), true))
+            .map(Into::<Field>::into)
             .collect();
         Schema::new(fields)
     }
 }
 
+pub(crate) fn record_batch_from_data_and_schema(
+    schema: Schema,
+    data: &ExcelSheetData,
+    offset: usize,
+    limit: usize,
+) -> FastExcelResult<RecordBatch> {
+    let mut iter = schema
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(col_idx, field)| {
+            (
+                field.name(),
+                match field.data_type() {
+                    ArrowDataType::Boolean => create_boolean_array(data, col_idx, offset, limit),
+                    ArrowDataType::Int64 => create_int_array(data, col_idx, offset, limit),
+                    ArrowDataType::Float64 => create_float_array(data, col_idx, offset, limit),
+                    ArrowDataType::Utf8 => create_string_array(data, col_idx, offset, limit),
+                    ArrowDataType::Timestamp(TimeUnit::Millisecond, None) => {
+                        create_datetime_array(data, col_idx, offset, limit)
+                    }
+                    ArrowDataType::Date32 => create_date_array(data, col_idx, offset, limit),
+                    ArrowDataType::Duration(TimeUnit::Millisecond) => {
+                        create_duration_array(data, col_idx, offset, limit)
+                    }
+                    ArrowDataType::Null => Arc::new(NullArray::new(limit - offset)),
+                    _ => unreachable!(),
+                },
+            )
+        })
+        .peekable();
+    // If the iterable is empty, try_from_iter returns an Err
+    if iter.peek().is_none() {
+        Ok(RecordBatch::new_empty(Arc::new(schema)))
+    } else {
+        RecordBatch::try_from_iter(iter)
+            .map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
+            .with_context(|| "could not create RecordBatch from iterable")
+    }
+}
+
 impl TryFrom<&ExcelSheet> for RecordBatch {
     type Error = FastExcelError;
 
diff --git a/src/types/python/excelsheet/sheet_data.rs b/src/types/python/excelsheet/sheet_data.rs
new file mode 100644
index 0000000..faa343c
--- /dev/null
+++ b/src/types/python/excelsheet/sheet_data.rs
@@ -0,0 +1,207 @@
+use std::sync::Arc;
+
+use arrow::array::Array;
+use calamine::{Data as CalData, DataRef as CalDataRef, DataType, Range};
+
+use crate::{
+    error::FastExcelResult,
+    types::dtype::{get_dtype_for_column, DType},
+};
+
+pub(crate) enum ExcelSheetData<'r> {
+    Owned(Range<CalData>),
+    Ref(Range<CalDataRef<'r>>),
+}
+
+impl ExcelSheetData<'_> {
+    pub(crate) fn width(&self) -> usize {
+        match self {
+            ExcelSheetData::Owned(range) => range.width(),
+            ExcelSheetData::Ref(range) => range.width(),
+        }
+    }
+
+    pub(crate) fn height(&self) -> usize {
+        match self {
+            ExcelSheetData::Owned(range) => range.height(),
+            ExcelSheetData::Ref(range) => range.height(),
+        }
+    }
+
+    pub(super) fn get_as_string(&self, pos: (usize, usize)) -> Option<String> {
+        match self {
+            ExcelSheetData::Owned(range) => range.get(pos).and_then(|data| data.as_string()),
+            ExcelSheetData::Ref(range) => range.get(pos).and_then(|data| data.as_string()),
+        }
+    }
+
+    pub(crate) fn dtype_for_column(
+        &self,
+        start_row: usize,
+        end_row: usize,
+        col: usize,
+    ) -> FastExcelResult<DType> {
+        match self {
+            ExcelSheetData::Owned(data) => get_dtype_for_column(data, start_row, end_row, col),
+            ExcelSheetData::Ref(data) => get_dtype_for_column(data, start_row, end_row, col),
+        }
+    }
+}
+
+impl From<Range<CalData>> for ExcelSheetData<'_> {
+    fn from(range: Range<CalData>) -> Self {
+        Self::Owned(range)
+    }
+}
+
+impl<'a> From<Range<CalDataRef<'a>>> for ExcelSheetData<'a> {
+    fn from(range: Range<CalDataRef<'a>>) -> Self {
+        Self::Ref(range)
+    }
+}
+
+mod array_impls {
+    use std::sync::Arc;
+
+    use arrow::array::{
+        Array, BooleanArray, Date32Array, DurationMillisecondArray, Float64Array, Int64Array,
+        StringArray, TimestampMillisecondArray,
+    };
+    use calamine::{CellType, DataType, Range};
+    use chrono::NaiveDate;
+
+    pub(super) fn create_boolean_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        Arc::new(BooleanArray::from_iter((offset..limit).map(|row| {
+            data.get((row, col)).and_then(|cell| {
+                if let Some(b) = cell.get_bool() {
+                    Some(b)
+                } else if let Some(i) = cell.get_int() {
+                    Some(i != 0)
+                }
+                // clippy formats else if let Some(blah) = ... { Some(x) } else { None } to the .map form
+                else {
+                    cell.get_float().map(|f| f != 0.0)
+                }
+            })
+        })))
+    }
+
+    pub(super) fn create_int_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        Arc::new(Int64Array::from_iter(
+            (offset..limit).map(|row| data.get((row, col)).and_then(|cell| cell.as_i64())),
+        ))
+    }
+
+    pub(super) fn create_float_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        Arc::new(Float64Array::from_iter(
+            (offset..limit).map(|row| data.get((row, col)).and_then(|cell| cell.as_f64())),
+        ))
+    }
+
+    pub(super) fn create_string_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        Arc::new(StringArray::from_iter((offset..limit).map(|row| {
+            data.get((row, col)).and_then(|cell| {
+                if cell.is_string() {
+                    cell.get_string().map(str::to_string)
+                } else if cell.is_datetime() {
+                    cell.get_datetime()
+                        .and_then(|dt| dt.as_datetime())
+                        .map(|dt| dt.to_string())
+                } else if cell.is_datetime_iso() {
+                    cell.get_datetime_iso().map(str::to_string)
+                } else {
+                    cell.as_string()
+                }
+            })
+        })))
+    }
+
+    fn duration_type_to_i64<DT: CellType + DataType>(caldt: &DT) -> Option<i64> {
+        caldt.as_duration().map(|d| d.num_milliseconds())
+    }
+
+    pub(super) fn create_date_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+        Arc::new(Date32Array::from_iter((offset..limit).map(|row| {
+            data.get((row, col))
+                .and_then(|caldate| caldate.as_date())
+                .and_then(|date| i32::try_from(date.signed_duration_since(epoch).num_days()).ok())
+        })))
+    }
+
+    pub(super) fn create_datetime_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        Arc::new(TimestampMillisecondArray::from_iter((offset..limit).map(
+            |row| {
+                data.get((row, col))
+                    .and_then(|caldt| caldt.as_datetime())
+                    .map(|dt| dt.and_utc().timestamp_millis())
+            },
+        )))
+    }
+
+    pub(super) fn create_duration_array<DT: CellType + DataType>(
+        data: &Range<DT>,
+        col: usize,
+        offset: usize,
+        limit: usize,
+    ) -> Arc<dyn Array> {
+        Arc::new(DurationMillisecondArray::from_iter(
+            (offset..limit).map(|row| data.get((row, col)).and_then(duration_type_to_i64)),
+        ))
+    }
+}
+
+/// Creates a function that will dispatch ExcelData to the generic create_x_array implementation
+macro_rules! create_array_function {
+    ($func_name:ident) => {
+        pub(crate) fn $func_name(
+            data: &ExcelSheetData,
+            col: usize,
+            offset: usize,
+            limit: usize,
+        ) -> Arc<dyn Array> {
+            match data {
+                ExcelSheetData::Owned(range) => array_impls::$func_name(range, col, offset, limit),
+                ExcelSheetData::Ref(range) => array_impls::$func_name(range, col, offset, limit),
+            }
+        }
+    };
+}
+
+create_array_function!(create_boolean_array);
+create_array_function!(create_string_array);
+create_array_function!(create_int_array);
+create_array_function!(create_float_array);
+create_array_function!(create_datetime_array);
+create_array_function!(create_date_array);
+create_array_function!(create_duration_array);
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
new file mode 100644
index 0000000..014c083
--- /dev/null
+++ b/src/utils/mod.rs
@@ -0,0 +1 @@
+pub(crate) mod schema;
diff --git a/src/utils/schema.rs b/src/utils/schema.rs
new file mode 100644
index 0000000..79dcbae
--- /dev/null
+++ b/src/utils/schema.rs
@@ -0,0 +1,48 @@
+use std::cmp::min;
+
+/// Determines how many rows should be used for schema sampling, based on the provided parameter,
+/// and the sheet's offset and limit.
+///
+/// Note that here, the limit should be retrieved from the sheet's `limit()` method, and must not
+/// be out of the sheet's bounds
+pub(crate) fn get_schema_sample_rows(
+    sample_rows: Option<usize>,
+    offset: usize,
+    limit: usize,
+) -> usize {
+    // Checking how many rows we want to use to determine the dtype for a column. If sample_rows is
+    // not provided, we sample limit rows, i.e on the entire column
+    let sample_rows = offset + sample_rows.unwrap_or(limit);
+    // If sample_rows is higher than the sheet's limit, use the limit instead
+    min(sample_rows, limit)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::get_schema_sample_rows;
+    use rstest::rstest;
+
+    #[rstest]
+    // default value, 50 rows sheet, row limit should be 50
+    #[case(Some(1000), 0, 50, 50)]
+    // default value, 5000 rows sheet, row limit should be 1000
+    #[case(Some(1000), 0, 5000, 1000)]
+    // default value, 1500 rows sheet, offset of 1000, row limit should be 1500
+    #[case(Some(1000), 1000, 1500, 1500)]
+    // 100 sampling size, 1500 rows sheet, offset of 1000, row limit should be 1100
+    #[case(Some(100), 1000, 1500, 1100)]
+    // No value, 50 rows sheet, row limit should be 50
+    #[case(None, 0, 50, 50)]
+    // No value, 5000 rows sheet, row limit should be 5000
+    #[case(None, 0, 5000, 5000)]
+    // no value, 1500 rows sheet, offset of 1000, row limit should be 1500
+    #[case(None, 1000, 1500, 1500)]
+    fn test_get_schema_sample_rows_return_values(
+        #[case] sample_rows: Option<usize>,
+        #[case] offset: usize,
+        #[case] limit: usize,
+        #[case] expected: usize,
+    ) {
+        assert_eq!(get_schema_sample_rows(sample_rows, offset, limit), expected);
+    }
+}
diff --git a/test.py b/test.py
index 7ce0f28..5fa3c4a 100644
--- a/test.py
+++ b/test.py
@@ -7,6 +7,13 @@ def get_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
     parser.add_argument("file")
     parser.add_argument("-c", "--column", type=str, nargs="+", help="the columns to use")
+    parser.add_argument(
+        "--eager", action="store_true", help="wether the sheet should be loaded eagerly"
+    )
+    parser.add_argument(
+        "-i", "--iterations", type=int, help="the number of iterations to do", default=1
+    )
+
     return parser.parse_args()
 
 
@@ -15,8 +22,12 @@ def main():
     excel_file = fastexcel.read_excel(args.file)
     use_columns = args.column or None
 
-    for sheet_name in excel_file.sheet_names:
-        excel_file.load_sheet_by_name(sheet_name, use_columns=use_columns).to_arrow()
+    for _ in range(args.iterations):
+        for sheet_name in excel_file.sheet_names:
+            if args.eager:
+                excel_file.load_sheet_eager(sheet_name, use_columns=use_columns)
+            else:
+                excel_file.load_sheet(sheet_name, use_columns=use_columns).to_arrow()
 
 
 if __name__ == "__main__":