MemoryError: Unable to allocate 29.3 GiB for an array with shape (2211861,) and data type <U3551 #607
Unanswered
rohankarande2023
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Getting an error while creating a Character Level Tokenizer for PubMed_200k_RCT_numbers_replaced_with_at_sign NLP project.
#Create Character Level Tokenizer:
char_vectorizer=tf.keras.layers.TextVectorization(max_tokens=Num_Char_Tokens,output_sequence_length=char_per_sentence, name='char_vectorizer')
#Adapt character vectorizer to training characters
char_vectorizer.adapt(train_chars)
MemoryError Traceback (most recent call last)
Cell In[256], line 3
1 # Adapt character vectorizer to training characters
----> 3 char_vectorizer.adapt(train_chars).batch(32)
File ~\anaconda3\lib\site-packages\keras\src\layers\preprocessing\text_vectorization.py:473, in TextVectorization.adapt(self, data, batch_size, steps)
423 def adapt(self, data, batch_size=None, steps=None):
424 """Computes a vocabulary of string terms from tokens in a dataset.
425
426 Calling
adapt()
on aTextVectorization
layer is an alternative to(...)
471 argument is not supported with array inputs.
472 """
--> 473 super().adapt(data, batch_size=batch_size, steps=steps)
File ~\anaconda3\lib\site-packages\keras\src\engine\base_preprocessing_layer.py:246, in PreprocessingLayer.adapt(self, data, batch_size, steps)
244 if self.built:
245 self.reset_state()
--> 246 data_handler = data_adapter.DataHandler(
247 data,
248 batch_size=batch_size,
249 steps_per_epoch=steps,
250 epochs=1,
251 steps_per_execution=self._steps_per_execution,
252 distribute=False,
253 )
254 self._adapt_function = self.make_adapt_function()
255 for _, iterator in data_handler.enumerate_epochs():
File ~\anaconda3\lib\site-packages\keras\src\engine\data_adapter.py:1285, in DataHandler.init(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution, distribute, pss_evaluation_shards)
1282 self._steps_per_execution = steps_per_execution
1284 adapter_cls = select_data_adapter(x, y)
-> 1285 self._adapter = adapter_cls(
1286 x,
1287 y,
1288 batch_size=batch_size,
1289 steps=steps_per_epoch,
1290 epochs=epochs - initial_epoch,
1291 sample_weights=sample_weight,
1292 shuffle=shuffle,
1293 max_queue_size=max_queue_size,
1294 workers=workers,
1295 use_multiprocessing=use_multiprocessing,
1296 distribution_strategy=tf.distribute.get_strategy(),
1297 model=model,
1298 pss_evaluation_shards=pss_evaluation_shards,
1299 )
1301 strategy = tf.distribute.get_strategy()
1303 self._current_step = 0
File ~\anaconda3\lib\site-packages\keras\src\engine\data_adapter.py:714, in ListsOfScalarsDataAdapter.init(self, x, y, sample_weights, sample_weight_modes, batch_size, shuffle, **kwargs)
703 def init(
704 self,
705 x,
(...)
711 **kwargs,
712 ):
713 super().init(x, y, **kwargs)
--> 714 x = np.asarray(x)
715 if y is not None:
716 y = np.asarray(y)
MemoryError: Unable to allocate 29.3 GiB for an array with shape (2211861,) and data type <U3551
Beta Was this translation helpful? Give feedback.
All reactions