Skip to content

Commit

Permalink
Don't populate labels if label column is not specified in csv parser (#…
Browse files Browse the repository at this point in the history
…679)

* Don't populate labels if label column is not specified in csv parser

* simplify label handling
  • Loading branch information
rongou authored Mar 10, 2023
1 parent 81db539 commit ea21135
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions src/data/csv_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ struct CSVParserParam : public Parameter<CSVParserParam> {

/*!
* \brief CSVParser, parses a dense csv format.
* Currently is a dummy implementation, when label column is not specified.
* All columns are treated as real dense data.
* label will be assigned to 0.
* Label will be empty if the label column is not specified.
*
* This should be extended in future to accept arguments of column types.
*/
Expand Down Expand Up @@ -90,7 +89,6 @@ ParseBlock(const char *begin,
const char* p = lbegin;
int column_index = 0;
IndexType idx = 0;
DType label = DType(0.0f);
real_t weight = std::numeric_limits<real_t>::quiet_NaN();

while (p != lend) {
Expand All @@ -111,7 +109,7 @@ ParseBlock(const char *begin,
}

if (column_index == param_.label_column) {
label = v;
out->label.push_back(v);
} else if (std::is_same<DType, real_t>::value
&& column_index == param_.weight_column) {
weight = v;
Expand All @@ -136,13 +134,12 @@ ParseBlock(const char *begin,
// skip empty line
while ((*lend == '\n' || *lend == '\r') && lend != end) ++lend;
lbegin = lend;
out->label.push_back(label);
if (!std::isnan(weight)) {
out->weight.push_back(weight);
}
out->offset.push_back(out->index.size());
}
CHECK(out->label.size() + 1 == out->offset.size());
CHECK(out->label.size() == 0 || out->label.size() + 1 == out->offset.size());
CHECK(out->weight.size() == 0 || out->weight.size() + 1 == out->offset.size());
}
} // namespace data
Expand Down

0 comments on commit ea21135

Please sign in to comment.