-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e497d96
commit 26db838
Showing
22 changed files
with
624 additions
and
166 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
from .helpers import split_data, save_confusion_matrix, load_data, get_class_distribution, plot_class_distribution, plot_results | ||
from .zipml import evaluate_model, optimize_hyperparameters, train_model,compare_models,save_model, load_model,predict, main | ||
from .zipml import * | ||
from .visualization import * | ||
from .utils import * | ||
from .model import * | ||
from .data import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .preprocessing import label_encode_labels, one_hot_encode_labels, split_data | ||
from .file_operations import walk_through_dir, unzip_data, read_lines_from_file, load_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import os | ||
import pandas as pd | ||
import zipfile | ||
import logging | ||
from typing import Optional, List | ||
|
||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | ||
|
||
def walk_through_dir(dir_path: str) -> pd.DataFrame: | ||
""" | ||
Walks through dir_path returning its contents as a Pandas DataFrame. | ||
Args: | ||
dir_path (str): Target directory. | ||
Returns: | ||
pd.DataFrame: A DataFrame containing dirpath, dirnames, and filenames. | ||
""" | ||
# Initialize lists to store directory information | ||
dir_paths = [] # To store the path of each directory | ||
dir_names = [] # To store the names of subdirectories | ||
file_names = [] # To store the names of files (images) | ||
|
||
# Walk through the directory structure | ||
for dirpath, dirnames, filenames in os.walk(dir_path): | ||
# Append the collected information to the lists | ||
dir_paths.append(dirpath) | ||
dir_names.append(dirnames) | ||
file_names.append(filenames) | ||
|
||
# Log the number of subdirectories and images in the current directory | ||
logging.info(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.") | ||
|
||
# Create a DataFrame from the lists | ||
df = pd.DataFrame({ | ||
'dir_path': dir_paths, | ||
'dir_names': dir_names, | ||
'file_names': file_names | ||
}) | ||
|
||
return df # Return the DataFrame | ||
|
||
def unzip_data(filename: str, extract_path: Optional[str] = None) -> None: | ||
""" | ||
Unzips the specified zip file into the current working directory or a specified path. | ||
Args: | ||
filename (str): The file path to the target zip file that needs to be unzipped. | ||
extract_path (Optional[str]): The directory path where the contents should be extracted. | ||
If None, the contents will be extracted to the current working directory. | ||
""" | ||
# Open the specified zip file in read mode | ||
with zipfile.ZipFile(filename, "r") as zip_ref: | ||
# Extract all contents to the specified directory or current working directory | ||
zip_ref.extractall(extract_path if extract_path else ".") | ||
|
||
def read_lines_from_file(filename: str) -> List[str]: | ||
""" | ||
Reads the contents of a text file and returns the lines as a list of strings. | ||
Args: | ||
filename (str): A string containing the path to the target text file. | ||
Returns: | ||
List[str]: A list of strings, where each string represents a line from the file. | ||
""" | ||
with open(filename, "r") as file: | ||
return file.readlines() # Read all lines and return them as a list | ||
|
||
|
||
|
||
def load_data(file_path: str) -> pd.DataFrame: | ||
""" | ||
Loads a dataset from a CSV file. | ||
Parameters: | ||
file_path (str): Path to the CSV file. | ||
Returns: | ||
DataFrame: Loaded data as a pandas DataFrame. | ||
""" | ||
logging.info(f"Loading dataset from {file_path}.") | ||
return pd.read_csv(file_path) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .encoding import label_encode_labels, one_hot_encode_labels | ||
from .split_data import split_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from sklearn.calibration import LabelEncoder | ||
from sklearn.preprocessing import OneHotEncoder | ||
|
||
|
||
def one_hot_encode_labels(df: pd.DataFrame, column_name: str) -> np.ndarray: | ||
""" | ||
One-hot encodes the target labels from the specified column in the DataFrame. | ||
Args: | ||
df (pd.DataFrame): DataFrame containing the target column. | ||
column_name (str): Name of the column to be one-hot encoded. | ||
Returns: | ||
np.ndarray: One-hot encoded labels. | ||
""" | ||
one_hot_encoder = OneHotEncoder(sparse_output=False) | ||
labels = df[column_name].to_numpy().reshape(-1, 1) | ||
return one_hot_encoder.fit_transform(labels) | ||
|
||
def label_encode_labels(df: pd.DataFrame, column_name: str) -> tuple: | ||
""" | ||
Label encodes the target labels and returns both the encoded labels and the class names. | ||
Args: | ||
df (pd.DataFrame): DataFrame containing the target column. | ||
column_name (str): Name of the column to be label encoded. | ||
Returns: | ||
tuple: (Encoded labels, Class names). | ||
""" | ||
label_encoder = LabelEncoder() | ||
labels_encoded = label_encoder.fit_transform(df[column_name].to_numpy()) | ||
class_names = label_encoder.classes_ | ||
return labels_encoded, class_names |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
|
||
import logging | ||
import pandas as pd | ||
from typing import Any, Tuple, Union | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | ||
|
||
def split_data(X: Union[pd.DataFrame, Any], y: Any, test_size: float = 0.2) -> Tuple[Union[pd.DataFrame, Any], Union[pd.DataFrame, Any], Any, Any]: | ||
""" | ||
Splits data into training and testing sets. | ||
Parameters: | ||
X (DataFrame or array-like): Features. | ||
y (array-like): Target labels. | ||
test_size (float): Proportion of the dataset to include in the test split. | ||
Returns: | ||
tuple: Split datasets (X_train, X_test, y_train, y_test) | ||
""" | ||
logging.info(f"Splitting data with test size of {test_size}.") | ||
return train_test_split(X, y, test_size=test_size, random_state=42) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .analyze_model_predictions import analyze_model_predictions | ||
from .measure_prediction_time import measure_prediction_time | ||
from .calculate_model_results import calculate_model_results |
Oops, something went wrong.