From ea21135fbb141ae103fb5fc960289b5601b468f2 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 9 Mar 2023 19:16:31 -0800 Subject: [PATCH] Don't populate labels if label column is not specified in csv parser (#679) * Don't populate labels if label column is not specified in csv parser * simplify label handling --- src/data/csv_parser.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/data/csv_parser.h b/src/data/csv_parser.h index 9b945a657f..026422577e 100644 --- a/src/data/csv_parser.h +++ b/src/data/csv_parser.h @@ -42,9 +42,8 @@ struct CSVParserParam : public Parameter { /*! * \brief CSVParser, parses a dense csv format. - * Currently is a dummy implementation, when label column is not specified. * All columns are treated as real dense data. - * label will be assigned to 0. + * Label will be empty if the label column is not specified. * * This should be extended in future to accept arguments of column types. */ @@ -90,7 +89,6 @@ ParseBlock(const char *begin, const char* p = lbegin; int column_index = 0; IndexType idx = 0; - DType label = DType(0.0f); real_t weight = std::numeric_limits::quiet_NaN(); while (p != lend) { @@ -111,7 +109,7 @@ ParseBlock(const char *begin, } if (column_index == param_.label_column) { - label = v; + out->label.push_back(v); } else if (std::is_same::value && column_index == param_.weight_column) { weight = v; @@ -136,13 +134,12 @@ ParseBlock(const char *begin, // skip empty line while ((*lend == '\n' || *lend == '\r') && lend != end) ++lend; lbegin = lend; - out->label.push_back(label); if (!std::isnan(weight)) { out->weight.push_back(weight); } out->offset.push_back(out->index.size()); } - CHECK(out->label.size() + 1 == out->offset.size()); + CHECK(out->label.size() == 0 || out->label.size() + 1 == out->offset.size()); CHECK(out->weight.size() == 0 || out->weight.size() + 1 == out->offset.size()); } } // namespace data