You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
Expected Behavior
train_test_data_setup() function emits correct stratification
or fit() method allows stratify parameter to enable/disable it
Current Behavior
Fedot cannot make data stratification and fails to fit a data
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/fedot/api/api_utils/assumptions/assumptions_handler.py:67, in AssumptionsHandler.fit_assumption_and_check_correctness(self, pipeline, pipelines_cache, preprocessing_cache, eval_n_jobs)
66 try:
---> 67 data_train, data_test = train_test_data_setup(self.data)
68 self.log.info('Initial pipeline fitting started')
File /opt/conda/lib/python3.10/site-packages/fedot/core/data/data_split.py:199, in train_test_data_setup(data, split_ratio, shuffle, shuffle_flag, stratify, random_seed, validation_blocks)
198 split_func = split_func_dict[data.data_type]
--> 199 train_data, test_data = split_func(data, **input_arguments)
200 elif isinstance(data, MultiModalData):
File /opt/conda/lib/python3.10/site-packages/fedot/core/data/data_split.py:101, in _split_any(data, split_ratio, shuffle, stratify, random_seed, **kwargs)
99 stratify_labels = data.target if stratify else None
--> 101 train_ids, test_ids = train_test_split(np.arange(0, len(data.target)),
102 test_size=1. - split_ratio,
103 shuffle=shuffle,
104 random_state=random_seed,
105 stratify=stratify_labels)
107 train_data = _split_input_data_by_indexes(data, index=train_ids)
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_split.py:2583, in train_test_split(test_size, train_size, random_state, shuffle, stratify, *arrays)
2581 cv = CVClass(test_size=n_test, train_size=n_train, random_state=random_state)
-> 2583 train, test = next(cv.split(X=arrays[0], y=stratify))
2585 return list(
2586 chain.from_iterable(
2587 (_safe_indexing(a, train), _safe_indexing(a, test)) for a in arrays
2588 )
2589 )
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_split.py:1689, in BaseShuffleSplit.split(self, X, y, groups)
1688 X, y, groups = indexable(X, y, groups)
-> 1689 for train, test in self._iter_indices(X, y, groups):
1690 yield train, test
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_split.py:2078, in StratifiedShuffleSplit._iter_indices(self, X, y, groups)
2077 if np.min(class_counts) < 2:
-> 2078 raise ValueError(
2079 "The least populated class in y has only 1"
2080 " member, which is too few. The minimum"
2081 " number of groups for any class cannot"
2082 " be less than 2."
2083 )
2085 if n_train < n_classes:
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[61], line 1
----> 1 auto_model.fit(features=train)
File /opt/conda/lib/python3.10/site-packages/fedot/api/main.py:176, in Fedot.fit(self, features, target, predefined_model)
172 self.current_pipeline = PredefinedModel(predefined_model, self.train_data, self.log,
173 use_input_preprocessing=self.params.get(
174 'use_input_preprocessing')).fit()
175 else:
--> 176 self.current_pipeline, self.best_models, self.history = self.api_composer.obtain_model(self.train_data)
178 if self.current_pipeline is None:
179 raise ValueError('No models were found')
File /opt/conda/lib/python3.10/site-packages/fedot/api/api_utils/api_composer.py:63, in ApiComposer.obtain_model(self, train_data)
59 with_tuning = self.params.get('with_tuning')
61 self.timer = ApiTime(time_for_automl=timeout, with_tuning=with_tuning)
---> 63 initial_assumption, fitted_assumption = self.propose_and_fit_initial_assumption(train_data)
65 multi_objective = len(self.metrics) > 1
66 self.params.init_params_for_composing(self.timer.timedelta_composing, multi_objective)
File /opt/conda/lib/python3.10/site-packages/fedot/api/api_utils/api_composer.py:107, in ApiComposer.propose_and_fit_initial_assumption(self, train_data)
100 initial_assumption = assumption_handler.propose_assumptions(self.params.get('initial_assumption'),
101 available_operations,
102 use_input_preprocessing=self.params.get(
103 'use_input_preprocessing'))
105 with self.timer.launch_assumption_fit():
106 fitted_assumption = \
--> 107 assumption_handler.fit_assumption_and_check_correctness(deepcopy(initial_assumption[0]),
108 pipelines_cache=self.pipelines_cache,
109 preprocessing_cache=self.preprocessing_cache,
110 eval_n_jobs=self.params.n_jobs)
112 self.log.message(
113 f'Initial pipeline was fitted in {round(self.timer.assumption_fit_spend_time.total_seconds(), 1)} sec.')
115 self.params.update(preset=assumption_handler.propose_preset(preset, self.timer, n_jobs=self.params.n_jobs))
File /opt/conda/lib/python3.10/site-packages/fedot/api/api_utils/assumptions/assumptions_handler.py:86, in AssumptionsHandler.fit_assumption_and_check_correctness(self, pipeline, pipelines_cache, preprocessing_cache, eval_n_jobs)
81 MemoryAnalytics.log(self.log,
82 additional_info='fitting of the initial pipeline',
83 logging_level=45) # message logging level
85 except Exception as ex:
---> 86 self._raise_evaluating_exception(ex)
87 return pipeline
File /opt/conda/lib/python3.10/site-packages/fedot/api/api_utils/assumptions/assumptions_handler.py:94, in AssumptionsHandler._raise_evaluating_exception(self, ex)
92 self.log.info(fit_failed_info)
93 print(traceback.format_exc())
---> 94 raise ValueError(advice_info)
ValueError: Initial pipeline fit was failed due to: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.. Check pipeline structure and the correctness of the data
Possible Solution
Possible solution can be found in Fedot.Industrial
Stratification fails on
fit()
function:ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
Expected Behavior
train_test_data_setup()
function emits correct stratificationor
fit()
method allowsstratify
parameter to enable/disable itCurrent Behavior
Fedot cannot make data stratification and fails to fit a data
Possible Solution
Possible solution can be found in Fedot.Industrial
Steps to Reproduce
Live example - https://www.kaggle.com/code/eliyahusanti/steel-plates-faults
Context [OPTIONAL]
Participating in a Kaggle Playground Series S4E3 competition.
The text was updated successfully, but these errors were encountered: