pola-rs · ritchie46 · Aug 19, 2024 · Aug 18, 2024 · Aug 18, 2024 · nameexhaustion
@@ -461,7 +461,7 @@ pub trait ChunkFilter<T: PolarsDataType> {
 /// Create a new ChunkedArray filled with values at that index.
 pub trait ChunkExpandAtIndex<T: PolarsDataType> {
     /// Create a new ChunkedArray filled with values at that index.
-    fn new_from_index(&self, length: usize, index: usize) -> ChunkedArray<T>;
+    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T>;
 }
 
 macro_rules! impl_chunk_expand {
@@ -536,7 +536,7 @@ impl ChunkExpandAtIndex<ListType> for ListChunked {
 
 #[cfg(feature = "dtype-struct")]
 impl ChunkExpandAtIndex<StructType> for StructChunked {
-    fn new_from_index(&self, length: usize, index: usize) -> ChunkedArray<StructType> {
+    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<StructType> {
         let (chunk_idx, idx) = self.index_to_chunked_index(index);
         let chunk = self.downcast_chunks().get(chunk_idx).unwrap();
         let chunk = if chunk.is_null(idx) {

@@ -1171,8 +1171,15 @@ impl DataFrame {
     /// # Safety
     /// The caller must ensure `column.len() == self.height()` .
     pub unsafe fn with_column_unchecked(&mut self, column: Series) -> &mut Self {
-        self.get_columns_mut().push(column);
-        self
+        #[cfg(debug_assertions)]
+        {
+            return self.with_column(column).unwrap();
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self.get_columns_mut().push(column);
+            self
+        }
     }
 
     fn add_column_by_schema(&mut self, s: Series, schema: &Schema) -> PolarsResult<()> {

@@ -149,7 +149,7 @@ impl SeriesTrait for SeriesWrap<StructChunked> {
     }
 
     fn new_from_index(&self, _index: usize, _length: usize) -> Series {
-        self.0.new_from_index(_length, _index).into_series()
+        self.0.new_from_index(_index, _length).into_series()
     }
 
     fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult<Series> {

@@ -1022,7 +1022,7 @@ impl BatchedParquetReader {
 
                 // Re-use the same ChunkedArray
                 if ca.len() < max_len {
-                    *ca = ca.new_from_index(max_len, 0);
+                    *ca = ca.new_from_index(0, max_len);
                 }
 
                 for df in &mut dfs {

@@ -187,15 +187,14 @@ impl ParquetExec {
                 readers_and_metadata
                     .into_par_iter()
                     .zip(row_statistics.into_par_iter())
-                    .enumerate()
                     .map(
-                        |(i, ((reader, _, predicate, projection), (cumulative_read, slice)))| {
+                        |((reader, _, predicate, projection), (cumulative_read, slice))| {
                             let row_index = base_row_index.as_ref().map(|rc| RowIndex {
                                 name: rc.name.clone(),
                                 offset: rc.offset + cumulative_read as IdxSize,
                             });
 
-                            let mut df = reader
+                            let df = reader
                                 .with_slice(Some(slice))
                                 .with_row_index(row_index)
                                 .with_predicate(predicate.clone())
@@ -210,20 +209,6 @@ impl ParquetExec {
                                 )?
                                 .finish()?;
 
-                            if let Some(col) = &self.file_options.include_file_paths {
-                                let path = paths[i].to_str().unwrap();
-                                unsafe {
-                                    df.with_column_unchecked(
-                                        StringChunked::full(
-                                            col,
-                                            path,
-                                            std::cmp::max(df.height(), slice.1),
-                                        )
-                                        .into_series(),
-                                    )
-                                };
-                            }
-
                             Ok(df)
                         },
                     )

@@ -211,7 +211,7 @@ impl Source for CsvSource {
 
             if let Some(ca) = &mut self.include_file_path {
                 if ca.len() < max_height {
-                    *ca = ca.new_from_index(max_height, 0);
+                    *ca = ca.new_from_index(0, max_height);
                 };
 
                 for data_chunk in &mut out {

@@ -640,18 +640,14 @@ def test_scan_include_file_name(
     streaming: bool,
 ) -> None:
     tmp_path.mkdir(exist_ok=True)
-    paths: list[Path] = []
     dfs: list[pl.DataFrame] = []
 
     for x in ["1", "2"]:
-        paths.append(Path(f"{tmp_path}/{x}.bin").absolute())
-        dfs.append(pl.DataFrame({"x": x}))
-        write_func(dfs[-1], paths[-1])
-
-    df = pl.concat(dfs).with_columns(
-        pl.Series("path", map(str, paths), dtype=pl.String)
-    )
+        path = Path(f"{tmp_path}/{x}.bin").absolute()
+        dfs.append(pl.DataFrame({"x": 10 * [x]}).with_columns(path=pl.lit(str(path))))
+        write_func(dfs[-1].drop("path"), path)
 
+    df = pl.concat(dfs)
     assert df.columns == ["x", "path"]
 
     with pytest.raises(