MAINT Remove head method (#766)

Co-authored-by: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
INRIA · Apr 26, 2024 · 09ad771 · 09ad771
1 parent 913cc1c
commit 09ad771
Show file tree

Hide file tree

Showing 13 changed files with 29 additions and 20 deletions.
diff --git a/python_scripts/01_tabular_data_exploration.py b/python_scripts/01_tabular_data_exploration.py
@@ -70,6 +70,15 @@
 # %%
 adult_census.head()
 
+# %% [markdown]
+# An alternative is to omit the `head` method. This would output the intial and
+# final rows and columns, but everything in between is not shown by default. It
+# also provides the dataframe's dimensions at the bottom in the format `n_rows`
+# x `n_columns`.
+
+# %%
+adult_census
+
 # %% [markdown]
 # The column named **class** is our target variable (i.e., the variable which we
 # want to predict). The two possible classes are `<=50K` (low-revenue) and

diff --git a/python_scripts/02_numerical_pipeline_hands_on.py b/python_scripts/02_numerical_pipeline_hands_on.py
@@ -34,7 +34,7 @@
 adult_census = pd.read_csv("../datasets/adult-census.csv")
 # drop the duplicated column `"education-num"` as stated in the first notebook
 adult_census = adult_census.drop(columns="education-num")
-adult_census.head()
+adult_census
 
 # %% [markdown]
 # The next step separates the target from the data. We performed the same
@@ -44,7 +44,7 @@
 data, target = adult_census.drop(columns="class"), adult_census["class"]
 
 # %%
-data.head()
+data
 
 # %%
 target
@@ -95,7 +95,7 @@
 # the `object` data type.
 
 # %%
-data.head()
+data
 
 # %% [markdown]
 # We see that the `object` data type corresponds to columns containing strings.
@@ -105,7 +105,7 @@
 
 # %%
 numerical_columns = ["age", "capital-gain", "capital-loss", "hours-per-week"]
-data[numerical_columns].head()
+data[numerical_columns]
 
 # %% [markdown]
 # Now that we limited the dataset to numerical columns only, we can analyse

diff --git a/python_scripts/02_numerical_pipeline_introduction.py b/python_scripts/02_numerical_pipeline_introduction.py
@@ -39,7 +39,7 @@
 # Let's have a look at the first records of this dataframe:
 
 # %%
-adult_census.head()
+adult_census
 
 # %% [markdown]
 # We see that this CSV file contains all information: the target that we would
@@ -56,7 +56,7 @@
 
 # %%
 data = adult_census.drop(columns=[target_name])
-data.head()
+data
 
 # %% [markdown]
 # We can now linger on the variables, also denominated features, that we later

diff --git a/python_scripts/03_categorical_pipeline.py b/python_scripts/03_categorical_pipeline.py
@@ -81,7 +81,7 @@
 
 # %%
 data_categorical = data[categorical_columns]
-data_categorical.head()
+data_categorical
 
 # %%
 print(f"The dataset is composed of {data_categorical.shape[1]} features")
@@ -194,7 +194,7 @@
 
 # %%
 print(f"The dataset is composed of {data_categorical.shape[1]} features")
-data_categorical.head()
+data_categorical
 
 # %%
 data_encoded = encoder.fit_transform(data_categorical)

diff --git a/python_scripts/03_categorical_pipeline_column_transformer.py b/python_scripts/03_categorical_pipeline_column_transformer.py
@@ -165,7 +165,7 @@
 # method. As an example, we predict on the five first samples from the test set.
 
 # %%
-data_test.head()
+data_test
 
 # %%
 model.predict(data_test)[:5]

diff --git a/python_scripts/cross_validation_train_test.py b/python_scripts/cross_validation_train_test.py
@@ -41,15 +41,15 @@
 print(housing.DESCR)
 
 # %%
-data.head()
+data
 
 # %% [markdown]
 # To simplify future visualization, let's transform the prices from the 100
 # (k\$) range to the thousand dollars (k\$) range.
 
 # %%
 target *= 100
-target.head()
+target
 
 # %% [markdown]
 # ```{note}
@@ -218,7 +218,7 @@
 import pandas as pd
 
 cv_results = pd.DataFrame(cv_results)
-cv_results.head()
+cv_results
 
 # %% [markdown]
 # ```{tip}

diff --git a/python_scripts/linear_models_ex_02.py b/python_scripts/linear_models_ex_02.py
@@ -52,7 +52,7 @@
 
 data = penguins_non_missing[columns]
 target = penguins_non_missing[target_name]
-data.head()
+data
 
 # %% [markdown]
 # Now it is your turn to train a linear regression model on this dataset. First,

diff --git a/python_scripts/linear_models_sol_02.py b/python_scripts/linear_models_sol_02.py
@@ -46,7 +46,7 @@
 
 data = penguins_non_missing[columns]
 target = penguins_non_missing[target_name]
-data.head()
+data
 
 # %% [markdown]
 # Now it is your turn to train a linear regression model on this dataset. First,

diff --git a/python_scripts/linear_regression_without_sklearn.py b/python_scripts/linear_regression_without_sklearn.py
@@ -22,7 +22,7 @@
 import pandas as pd
 
 penguins = pd.read_csv("../datasets/penguins_regression.csv")
-penguins.head()
+penguins
 
 # %% [markdown]
 # We aim to solve the following problem: using the flipper length of a penguin,

diff --git a/python_scripts/parameter_tuning_grid_search.py b/python_scripts/parameter_tuning_grid_search.py
@@ -36,7 +36,7 @@
 
 # %%
 data = adult_census.drop(columns=[target_name, "education-num"])
-data.head()
+data
 
 # %% [markdown]
 # Once the dataset is loaded, we split it into a training and testing sets.
@@ -193,7 +193,7 @@
 cv_results = pd.DataFrame(model_grid_search.cv_results_).sort_values(
     "mean_test_score", ascending=False
 )
-cv_results.head()
+cv_results
 
 # %% [markdown]
 # Let us focus on the most interesting columns and shorten the parameter names

diff --git a/python_scripts/parameter_tuning_manual.py b/python_scripts/parameter_tuning_manual.py
@@ -38,7 +38,7 @@
 # Our data is only numerical.
 
 # %%
-data.head()
+data
 
 # %% [markdown]
 # Let's create a simple predictive model made of a scaler followed by a logistic

diff --git a/python_scripts/parameter_tuning_randomized_search.py b/python_scripts/parameter_tuning_randomized_search.py
@@ -44,7 +44,7 @@
 
 # %%
 data = adult_census.drop(columns=[target_name, "education-num"])
-data.head()
+data
 
 # %% [markdown]
 # Once the dataset is loaded, we split it into a training and testing sets.

diff --git a/python_scripts/trees_dataset.py b/python_scripts/trees_dataset.py
@@ -48,7 +48,7 @@
 # Let's check the dataset more into details.
 
 # %%
-penguins.head()
+penguins
 
 # %% [markdown]
 # Since that we have few samples, we can check a scatter plot to observe the