From 04c71d5eef1c2aff000e74d716a8e70d06c0af8d Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Wed, 29 Nov 2023 13:44:51 +0600 Subject: [PATCH 1/2] feat: iterator over rust range --- python/python_calamine/_python_calamine.pyi | 16 +++++++ src/types/cell.rs | 2 +- src/types/sheet.rs | 50 ++++++++++++++++++++- tests/test_base.py | 24 ++++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/python/python_calamine/_python_calamine.pyi b/python/python_calamine/_python_calamine.pyi index 8f9d0ae..cb9688c 100644 --- a/python/python_calamine/_python_calamine.pyi +++ b/python/python_calamine/_python_calamine.pyi @@ -72,6 +72,22 @@ class CalamineSheet: For suppress this behaviour, set `skip_empty_area` to `False`. """ + def iter_rows( + self, + ) -> typing.Iterator[ + list[ + int + | float + | str + | bool + | datetime.time + | datetime.date + | datetime.datetime + | datetime.timedelta + ] + ]: + """Retunrning data from sheet as iterator of lists.""" + @typing.final class CalamineWorkbook: path: str | None diff --git a/src/types/cell.rs b/src/types/cell.rs index 303204b..5f3483b 100644 --- a/src/types/cell.rs +++ b/src/types/cell.rs @@ -3,7 +3,7 @@ use std::convert::From; use calamine::DataType; use pyo3::prelude::*; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum CellValue { Int(i64), Float(f64), diff --git a/src/types/sheet.rs b/src/types/sheet.rs index ac13d28..4003218 100644 --- a/src/types/sheet.rs +++ b/src/types/sheet.rs @@ -1,7 +1,7 @@ use std::fmt::Display; use std::sync::Arc; -use calamine::{Data, Range, SheetType, SheetVisible}; +use calamine::{Data, Range, Rows, SheetType, SheetVisible}; use pyo3::class::basic::CompareOp; use pyo3::prelude::*; use pyo3::types::PyList; @@ -196,4 +196,52 @@ impl CalamineSheet { }), )) } + + fn iter_rows(&self) -> CalamineCellIterator { + CalamineCellIterator::from_range(Arc::clone(&self.range)) + } +} + +#[pyclass] +pub struct CalamineCellIterator { + position: u32, + start: (u32, u32), + empty_row: Vec, + iter: Rows<'static, Data>, + #[allow(dead_code)] + range: Arc>, +} + +impl CalamineCellIterator { + fn from_range(range: Arc>) -> CalamineCellIterator { + let mut empty_row = Vec::with_capacity(range.width()); + for _ in 0..range.width() { + empty_row.push(CellValue::String("".to_string())) + } + CalamineCellIterator { + empty_row, + position: 0, + start: range.start().unwrap(), + iter: unsafe { std::mem::transmute(range.rows()) }, + range, + } + } +} + +#[pymethods] +impl CalamineCellIterator { + fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<&PyList> { + slf.position += 1; + if slf.position > slf.start.0 { + slf.iter + .next() + .map(|row| PyList::new(slf.py(), row.iter().map(<&Data as Into>::into))) + } else { + Some(PyList::new(slf.py(), slf.empty_row.clone())) + } + } } diff --git a/tests/test_base.py b/tests/test_base.py index 0b239ef..17e9cc4 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -180,6 +180,30 @@ def test_xlsx_read(): assert [] == reader.get_sheet_by_index(1).to_python(skip_empty_area=False) +def test_xlsx_iter_rows(): + names = ["Sheet1", "Sheet2", "Sheet3"] + data = [ + ["", "", "", "", "", "", "", "", "", ""], + [ + "String", + 1, + 1.1, + True, + False, + date(2010, 10, 10), + datetime(2010, 10, 10, 10, 10, 10), + time(10, 10, 10), + timedelta(hours=10, minutes=10, seconds=10, microseconds=100000), + timedelta(hours=255, minutes=10, seconds=10), + ], + ] + + reader = CalamineWorkbook.from_object(PATH / "base.xlsx") + + assert names == reader.sheet_names + assert data == list(reader.get_sheet_by_index(0).iter_rows()) + + def test_nrows(): reader = CalamineWorkbook.from_object(PATH / "base.xlsx") sheet = reader.get_sheet_by_name("Sheet3") From ba8e6dc700dbe1a6af2ddb7a9920f070d4bd644b Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 15 Jul 2024 14:23:29 +0500 Subject: [PATCH 2/2] fix: change code for pyo3 0.22.1 --- src/types/sheet.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/types/sheet.rs b/src/types/sheet.rs index 4003218..b51e17e 100644 --- a/src/types/sheet.rs +++ b/src/types/sheet.rs @@ -214,15 +214,19 @@ pub struct CalamineCellIterator { impl CalamineCellIterator { fn from_range(range: Arc>) -> CalamineCellIterator { - let mut empty_row = Vec::with_capacity(range.width()); - for _ in 0..range.width() { - empty_row.push(CellValue::String("".to_string())) - } + let empty_row = (0..range.width()) + .map(|_| CellValue::String("".to_string())) + .collect(); CalamineCellIterator { empty_row, position: 0, start: range.start().unwrap(), - iter: unsafe { std::mem::transmute(range.rows()) }, + iter: unsafe { + std::mem::transmute::< + calamine::Rows<'_, calamine::Data>, + calamine::Rows<'static, calamine::Data>, + >(range.rows()) + }, range, } } @@ -234,14 +238,14 @@ impl CalamineCellIterator { slf } - fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<&PyList> { + fn __next__(mut slf: PyRefMut<'_, Self>) -> Option> { slf.position += 1; if slf.position > slf.start.0 { - slf.iter - .next() - .map(|row| PyList::new(slf.py(), row.iter().map(<&Data as Into>::into))) + slf.iter.next().map(|row| { + PyList::new_bound(slf.py(), row.iter().map(<&Data as Into>::into)) + }) } else { - Some(PyList::new(slf.py(), slf.empty_row.clone())) + Some(PyList::new_bound(slf.py(), slf.empty_row.clone())) } } }