From 94fc939fd4d0a7e49eb34e854648bf34f3a63c6c Mon Sep 17 00:00:00 2001 From: Paulo Hernane Date: Tue, 1 Oct 2024 20:09:24 +0000 Subject: [PATCH] Add categorical variables section to data pre-processing in machine learning course --- .../categorical-variables.md | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 my-brain/machine-learning-and-data-science-course/data-pre-processing/categorical-variables.md diff --git a/my-brain/machine-learning-and-data-science-course/data-pre-processing/categorical-variables.md b/my-brain/machine-learning-and-data-science-course/data-pre-processing/categorical-variables.md new file mode 100644 index 00000000..75543797 --- /dev/null +++ b/my-brain/machine-learning-and-data-science-course/data-pre-processing/categorical-variables.md @@ -0,0 +1,40 @@ +--- +id: "categorical-variables" +title: "Categorical Variables" +--- + +To use some variables that are categorical we need to transform them into numerical values. This process is known as encoding or feature encoding. + +### Label Encoding + +Label encoding is a technique used to convert categorical variables into numerical values. It assigns a unique integer to each category in the variable. + +```python +from sklearn.preprocessing import LabelEncoder + +# Create a label encoder object +label_encoder = LabelEncoder() + +# Fit the encoder to the data +data['category'] = label_encoder.fit_transform(data['category']) +``` + +### One-Hot Encoding + +This technique is used to convert categorical variables into binary vectors. It creates a new binary column for each category in the variable. + +```python +from sklearn.preprocessing import OneHotEncoder +from sklearn.compose import ColumnTransformer + +# Create a column transformer object +column_transformer = ColumnTransformer( + transformers=[ + ('encoder', OneHotEncoder(), ['category', 'category2']) + ], + remainder='passthrough' # Keep the remaining columns +) + +# Fit the transformer to the data +data = column_transformer.fit_transform(data) +```