Skip to content

Commit

Permalink
fix: Don't override CSV reader encoding with lossy UTF-8 (#16151)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored May 10, 2024
1 parent b2291b9 commit ebeddf9
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 17 deletions.
7 changes: 0 additions & 7 deletions crates/polars-lazy/src/physical_plan/executors/scan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,6 @@ impl CsvExec {
.with_columns(with_columns)
.with_rechunk(self.file_options.rechunk)
.with_row_index(self.file_options.row_index.clone())
.map_parse_options(|parse_options| {
parse_options.with_encoding(
// TODO: We don't know why LossyUtf8 is set here, so remove it
// to see if it breaks anything.
CsvEncoding::LossyUtf8,
)
})
.with_path(Some(self.path.clone()))
.try_into_reader_with_file_path(None)?
._with_predicate(predicate)
Expand Down
11 changes: 1 addition & 10 deletions crates/polars-pipe/src/executors/sources/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ use std::path::PathBuf;

use polars_core::export::arrow::Either;
use polars_core::POOL;
use polars_io::csv::read::{
BatchedCsvReaderMmap, BatchedCsvReaderRead, CsvEncoding, CsvReadOptions, CsvReader,
};
use polars_io::csv::read::{BatchedCsvReaderMmap, BatchedCsvReaderRead, CsvReadOptions, CsvReader};
use polars_plan::global::_set_n_rows_for_scan;
use polars_plan::prelude::FileScanOptions;
use polars_utils::iter::EnumerateIdxTrait;
Expand Down Expand Up @@ -68,13 +66,6 @@ impl CsvSource {
.with_columns(with_columns)
.with_rechunk(false)
.with_row_index(file_options.row_index)
.map_parse_options(|parse_options| {
parse_options.with_encoding(
// TODO: We don't know why LossyUtf8 is set here, so remove it
// to see if it breaks anything.
CsvEncoding::LossyUtf8,
)
})
.with_path(Some(path))
.try_into_reader_with_file_path(None)?;

Expand Down

0 comments on commit ebeddf9

Please sign in to comment.