diff --git a/common/helpers/colour_estimation/hair_colours/beeline honey.jpg b/common/helpers/colour_estimation/hair_colours/beeline honey.jpg new file mode 100644 index 000000000..f83d18886 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/beeline honey.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/blowout burgundy.jpg b/common/helpers/colour_estimation/hair_colours/blowout burgundy.jpg new file mode 100644 index 000000000..e29189524 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/blowout burgundy.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/butter scotch.jpg b/common/helpers/colour_estimation/hair_colours/butter scotch.jpg new file mode 100644 index 000000000..090d91ad1 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/butter scotch.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/caramel.jpg b/common/helpers/colour_estimation/hair_colours/caramel.jpg new file mode 100644 index 000000000..1a26d62c6 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/caramel.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/chocolate brown.jpg b/common/helpers/colour_estimation/hair_colours/chocolate brown.jpg new file mode 100644 index 000000000..7db8c90c9 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/chocolate brown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/chocolate-cherry.jpg b/common/helpers/colour_estimation/hair_colours/chocolate-cherry.jpg new file mode 100644 index 000000000..b7b9b2816 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/chocolate-cherry.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/copper-shimmer.jpg b/common/helpers/colour_estimation/hair_colours/copper-shimmer.jpg new file mode 100644 index 000000000..4ef7a5062 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/copper-shimmer.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/crushedgarnet.jpg b/common/helpers/colour_estimation/hair_colours/crushedgarnet.jpg new file mode 100644 index 000000000..3723ce8cf Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/crushedgarnet.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/dark golden brown.jpg b/common/helpers/colour_estimation/hair_colours/dark golden brown.jpg new file mode 100644 index 000000000..8005d3391 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/dark golden brown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/espresso.jpg b/common/helpers/colour_estimation/hair_colours/espresso.jpg new file mode 100644 index 000000000..6ef551fbc Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/espresso.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/expresso.jpg b/common/helpers/colour_estimation/hair_colours/expresso.jpg new file mode 100644 index 000000000..7c64850a6 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/expresso.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/french-roast.jpg b/common/helpers/colour_estimation/hair_colours/french-roast.jpg new file mode 100644 index 000000000..ac3368aa8 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/french-roast.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/gray.png b/common/helpers/colour_estimation/hair_colours/gray.png new file mode 100644 index 000000000..97b47262a Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/gray.png differ diff --git a/common/helpers/colour_estimation/hair_colours/havanabrown.jpg b/common/helpers/colour_estimation/hair_colours/havanabrown.jpg new file mode 100644 index 000000000..277a2b9a7 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/havanabrown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/hot-toffee.jpg b/common/helpers/colour_estimation/hair_colours/hot-toffee.jpg new file mode 100644 index 000000000..dcfdebfa0 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/hot-toffee.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/jet black.jpg b/common/helpers/colour_estimation/hair_colours/jet black.jpg new file mode 100644 index 000000000..2a56c66e4 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/jet black.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/leatherblack.jpg b/common/helpers/colour_estimation/hair_colours/leatherblack.jpg new file mode 100644 index 000000000..3eb7f04c0 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/leatherblack.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light ash blonde.jpg b/common/helpers/colour_estimation/hair_colours/light ash blonde.jpg new file mode 100644 index 000000000..41882cdbd Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light ash blonde.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light auburn.jpg b/common/helpers/colour_estimation/hair_colours/light auburn.jpg new file mode 100644 index 000000000..7dd3c79f0 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light auburn.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light blonde.jpg b/common/helpers/colour_estimation/hair_colours/light blonde.jpg new file mode 100644 index 000000000..bb8fe425d Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light blonde.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light brown.jpg b/common/helpers/colour_estimation/hair_colours/light brown.jpg new file mode 100644 index 000000000..3ae276b24 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light brown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light cool brown.jpg b/common/helpers/colour_estimation/hair_colours/light cool brown.jpg new file mode 100644 index 000000000..3c59c7ace Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light cool brown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light golden blonde.jpg b/common/helpers/colour_estimation/hair_colours/light golden blonde.jpg new file mode 100644 index 000000000..f02c77e39 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light golden blonde.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/light golden brown.jpg b/common/helpers/colour_estimation/hair_colours/light golden brown.jpg new file mode 100644 index 000000000..6592a4f7d Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/light golden brown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/medium ash brown.jpg b/common/helpers/colour_estimation/hair_colours/medium ash brown.jpg new file mode 100644 index 000000000..1cfb60d55 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/medium ash brown.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/medium auburn.jpg b/common/helpers/colour_estimation/hair_colours/medium auburn.jpg new file mode 100644 index 000000000..cef69b8b6 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/medium auburn.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/medium champagne.jpg b/common/helpers/colour_estimation/hair_colours/medium champagne.jpg new file mode 100644 index 000000000..5c10df037 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/medium champagne.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/midnighrruby.jpg b/common/helpers/colour_estimation/hair_colours/midnighrruby.jpg new file mode 100644 index 000000000..c057e82bc Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/midnighrruby.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/pure-diamond.jpg b/common/helpers/colour_estimation/hair_colours/pure-diamond.jpg new file mode 100644 index 000000000..80da260a6 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/pure-diamond.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/red hot cinnamon.jpg b/common/helpers/colour_estimation/hair_colours/red hot cinnamon.jpg new file mode 100644 index 000000000..3b5185bbe Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/red hot cinnamon.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/reddish blonde.jpg b/common/helpers/colour_estimation/hair_colours/reddish blonde.jpg new file mode 100644 index 000000000..8873b6142 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/reddish blonde.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/rubyfusion.jpg b/common/helpers/colour_estimation/hair_colours/rubyfusion.jpg new file mode 100644 index 000000000..4c2174cc2 Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/rubyfusion.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/sparkling-amber.jpg b/common/helpers/colour_estimation/hair_colours/sparkling-amber.jpg new file mode 100644 index 000000000..e36223b9c Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/sparkling-amber.jpg differ diff --git a/common/helpers/colour_estimation/hair_colours/sunflower-blonde.jpg b/common/helpers/colour_estimation/hair_colours/sunflower-blonde.jpg new file mode 100644 index 000000000..48b0181bf Binary files /dev/null and b/common/helpers/colour_estimation/hair_colours/sunflower-blonde.jpg differ diff --git a/common/helpers/colour_estimation/src/colour_estimation/__init__.py b/common/helpers/colour_estimation/src/colour_estimation/__init__.py index 54779f81d..66d308c9d 100644 --- a/common/helpers/colour_estimation/src/colour_estimation/__init__.py +++ b/common/helpers/colour_estimation/src/colour_estimation/__init__.py @@ -1,6 +1,10 @@ -from .rgb import RGB_COLOURS, RGB_HAIR_COLOURS - import numpy as np +import os +import cv2 +# import torch +# from scipy.ndimage import convolve + +from .rgb import * def closest_colours(requested_colour, colours): @@ -18,3 +22,326 @@ def closest_colours(requested_colour, colours): for color_name, distance in top_three_colors] return formatted_colors + + +# def avg_color_float(rgb_image: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: +# mask = mask.bool() +# avg_colors = torch.zeros((rgb_image.size(0), mask.size(1), rgb_image.size(1)), device=rgb_image.device) +# for i in range(rgb_image.size(0)): +# for j in range(mask.size(1)): +# for k in range(rgb_image.size(1)): +# valid_pixels = torch.masked_select(rgb_image[i, k], mask[i, j]) +# avg_color = valid_pixels.float().mean() if valid_pixels.numel() > 0 else torch.tensor(0.0) +# avg_colors[i, j, k] = avg_color + +# return avg_colors # / 255.0 + + +# def median_color_float(rgb_image: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: +# mask = mask.bool() +# median_colors = torch.zeros((rgb_image.size(0), mask.size(1), rgb_image.size(1)), device=rgb_image.device) +# for i in range(rgb_image.size(0)): +# for j in range(mask.size(1)): +# for k in range(rgb_image.size(1)): +# valid_pixels = torch.masked_select(rgb_image[i, k], mask[i, j]) +# if valid_pixels.numel() > 0: +# median_value = valid_pixels.median() +# else: +# median_value = torch.tensor(0.0, device=rgb_image.device) +# median_colors[i, j, k] = median_value +# return median_colors # / 255.0 + + +# def plot_with_matplotlib(frame, categories, masks, predictions, colours): +# """Generate an image with matplotlib, showing the original frame and masks with titles and color overlays.""" +# assert len(masks) == len(categories) == len(predictions), "Length of masks, categories, and predictions must match." + +# num_masks = len(masks) +# cols = 3 +# rows = (num_masks + 1) // cols + ((num_masks + 1) % cols > 0) # Adding 1 for the frame +# position = range(1, num_masks + 2) # +2 to include the frame in the count + +# fig = plt.figure(figsize=(15, rows * 3)) # Adjust the size as needed + +# # Add the frame as the first image +# ax = fig.add_subplot(rows, cols, 1) +# # frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) +# ax.imshow(frame) +# ax.set_title('Original Frame') +# ax.axis('off') + +# # Iterate over the masks +# for i, idx in enumerate(position[1:], start=1): # Skip 1 for the frame +# ax = fig.add_subplot(rows, cols, idx) + +# # Create an RGB image for the colored mask +# colored_mask = np.stack([masks[i-1]]*3, axis=-1) # i-1 because we skip the frame in position + +# # Apply color if category is detected and color is provided +# if predictions[i-1]: +# if (i-1) < len(colours): +# color = np.array(colours[i-1], dtype=np.uint8) # Convert color to uint8 +# color_mask = np.zeros_like(colored_mask) # Initialize color_mask with the same shape as colored_mask +# color_mask[..., 0] = masks[i-1] * color[0] # Apply color channel 0 +# color_mask[..., 1] = masks[i-1] * color[1] # Apply color channel 1 +# color_mask[..., 2] = masks[i-1] * color[2] # Apply color channel 2 +# # Now combine the colored mask with the original grayscale mask +# colored_mask = np.where(masks[i-1][:, :, None], color_mask, colored_mask).astype(np.uint8) +# # Show the colored mask +# ax.imshow(colored_mask) +# # print(np.max(mask_image)) +# # mask_image = masks[i-1] +# # ax.imshow(mask_image, cmap="gray") +# else: +# # If there's no color provided for this category, use white color +# mask_image = masks[i-1] +# ax.imshow(mask_image, cmap="gray") +# else: +# # If the category is not detected, keep the mask black +# mask_image = masks[i-1] +# ax.imshow(mask_image, cmap="gray") + + +# # mask_image = masks[i-1] +# # ax.imshow(mask_image, cmap="gray") + +# # Set title with the detection status +# detection_status = 'yes' if predictions[i-1] else 'no' +# ax.set_title(f"{categories[i-1]} - {detection_status}") +# ax.axis('off') + +# plt.tight_layout() +# fig.canvas.draw() + +# # Retrieve buffer and close the plot to avoid memory issues +# data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) +# data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) +# plt.close(fig) + +# return data + + +def count_colours_in_masked_area(img, mask, colours, filter_size=3, sort=False): + """ + Counts the number of pixels of each color within the masked area of an image. + + Parameters: + img (numpy.ndarray): An RGB image, with the shape (height, width, 3). + mask (numpy.ndarray): A binary mask, with the shape (height, width), where 1 indicates the area of interest. + colours (dict): A dictionary where keys are color names and values are the corresponding RGB values. + filter_size (int): The size of the convolution filter to apply for smoothing the image, default is 3. + sort (bool): Whether to return a sorted list of colors based on pixel count, default is False. + + Returns: + dict: A dictionary containing the count of pixels for each color in the masked area. + If sort is True, it also returns a list of tuples, each containing a color name, its proportion in the masked area, and the pixel count. This list is sorted in descending order based on pixel count. + + The function first applies an averaging filter to the image for smoothing. Then, it calculates the Euclidean distance of each pixel in the masked area to the predefined colors. It identifies the closest color for each pixel, counts the occurrences of each color, and creates a dictionary mapping colors to their respective counts. If sorting is requested, it also calculates the proportion of each color and returns a sorted list of colors based on their pixel count. + """ + avg_filter = np.ones((filter_size, filter_size, 3)) / (filter_size ** 2) + img_filtered = img + # img_filtered = convolve(img, avg_filter, mode='constant', cval=0.0) + colours_array = np.array(list(colours.values())) + masked_img = img_filtered[mask == 1] + distances = np.linalg.norm(masked_img[:, None] - colours_array, axis=2) + closest_colours = np.argmin(distances, axis=1) + unique, counts = np.unique(closest_colours, return_counts=True) + colour_counts = {list(colours.keys())[i]: count for i, count in zip(unique, counts)} + if sort: + total_pixels = sum(counts) + sorted_colours = sorted(((list(colours.keys())[i], count / total_pixels, count) + for i, count in zip(unique, counts)), key=lambda item: item[2], reverse=True) + return colour_counts, sorted_colours + + return colour_counts + + +def average_colours_by_label(labels, colours): + """ + Computes the average values of colours associated with each label. + + Parameters: + labels (dict): A dictionary where keys are label names and values are lists of binary values (0 or 1). Each list represents whether a certain feature (labelled by the key) is present (1) or not (0) in a set of instances. + colours (dict): A dictionary where keys are label names and values are dictionaries. Each inner dictionary maps colour names to lists of values (e.g., pixel counts or intensities) associated with that colour for each instance. + + Returns: + dict: A dictionary where keys are label names and values are sorted lists of tuples. Each tuple contains a colour name and its average value calculated only from instances where the label is present (1). The tuples are sorted by average values in descending order. + + The function iterates through each label, calculating the average value for each colour only from instances where the label value is 1 (present). It then sorts these average values in descending order for each label and returns this sorted list along with the label name in a dictionary. + """ + averaged_colours = {} + + for label, label_values in labels.items(): + if label not in colours.keys(): + continue + + colour_values = colours[label] + averages = {} + + for colour, values in colour_values.items(): + valid_values = [value for value, label_value in zip(values, label_values) if label_value == 1] + if valid_values: + averages[colour] = sum(valid_values) / len(valid_values) + + sorted_colours = sorted(averages.items(), key=lambda item: item[1], reverse=True) + averaged_colours[label] = sorted_colours + + return averaged_colours + + +def load_images_to_dict(root_dir): + """ + Load images from a specified directory into a dictionary, removing file extensions from the keys. + + Parameters: + root_dir (str): The root directory containing the images. + + Returns: + dict: A dictionary with image names (without extensions) as keys and their corresponding numpy arrays as values. + """ + image_dict = {} + for filename in os.listdir(root_dir): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + img_path = os.path.join(root_dir, filename) + # Read the image using OpenCV + img = cv2.imread(img_path) + # Convert it from BGR to RGB color space + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + # Remove the file extension from the filename + name_without_extension = os.path.splitext(filename)[0] + image_dict[name_without_extension] = img + + return image_dict + + +def generate_colour_table(image_dict: dict, colour_map: dict): + """ + Generates a colour table for each image in the given dictionary, counting the colours in each image. + + Parameters: + image_dict (dict): A dictionary where keys are image identifiers and values are image arrays in the format (height, width, 3). + colour_map (dict): A dictionary mapping colour names to their respective RGB values. + + Returns: + dict: A dictionary where keys are image identifiers and values are colour tables. Each colour table is generated by the 'count_colours_in_masked_area' function and contains a count of how many times each colour (as defined in colour_map) appears in the corresponding image. + + For each image in the image_dict, this function creates a mask that covers the entire image and uses 'count_colours_in_masked_area' to count the occurrences of each colour in the colour_map within the image. The results are stored in a new dictionary, mapping each image identifier to its corresponding colour table. + """ + colour_table = {} + for k in image_dict.keys(): + colour_table[k] = count_colours_in_masked_area(image_dict[k], np.ones((image_dict[k].shape[0], image_dict[k].shape[1])), colour_map, sort=True) + return colour_table + + +def compare_colour_distributions(averaged_colours_list, colour_table_dict): + """ + Compares colour distributions between an averaged colours list and a dictionary of colour tables by calculating the Euclidean distance. + + Parameters: + averaged_colours_list (list): A list of tuples, where each tuple contains a colour name and its proportion. This is typically the output from 'average_colours_by_label'. + colour_table_dict (dict): A dictionary where keys are image identifiers and values are colour tables. Each colour table is a list of tuples, each containing a colour name, its proportion in the image, and the pixel count. + + Returns: + dict: A dictionary where keys are image identifiers and values are the Euclidean distances between the colour distribution in the image and the averaged_colours_list. + + The function iterates over each image's colour table in colour_table_dict. For each image, it calculates the Euclidean distance between the colour proportions in averaged_colours_list and the colour proportions in the image's colour table. The results are stored in a dictionary, mapping each image identifier to the calculated distance. + """ + distances = {} + + avg_colours_dict = {colour: proportion for colour, proportion in averaged_colours_list} + + for image_name, colour_data in colour_table_dict.items(): + colour_proportions = {colour: proportion for colour, proportion, _ in colour_data[1]} + + common_colours = set(avg_colours_dict.keys()) & set(colour_proportions.keys()) + avg_values = [avg_colours_dict.get(colour, 0) for colour in common_colours] + prop_values = [colour_proportions.get(colour, 0) for colour in common_colours] + + distances[image_name] = np.linalg.norm(np.array(avg_values) - np.array(prop_values)) + + sorted_distances = sorted(distances.items(), key=lambda item: item[1]) + + return sorted_distances + +# Example usage +# sorted_distances = compare_colour_distributions(averaged_colours, colour_table) + +def extract_top_colours_by_threshold(colour_list, threshold): + """ + Extracts top colours based on a cumulative proportion threshold. + + Parameters: + colour_list (list): A list of tuples, each being a 2-element (colour, proportion) or + a 3-element (colour, proportion, count) tuple. + threshold (float): A float between 0 and 1, representing the threshold for the cumulative proportion. + + Returns: + list: A list of tuples (colour, proportion), sorted by proportion in descending order, + whose cumulative proportion just exceeds the threshold. + """ + # Sort the list by proportion in descending order + sorted_colours = sorted(colour_list, key=lambda x: x[1], reverse=True) + + # Extract top colours based on the cumulative proportion threshold + cumulative_proportion = 0.0 + top_colours = [] + for colour in sorted_colours: + cumulative_proportion += colour[1] + top_colours.append((colour[0], colour[1])) + if cumulative_proportion >= threshold: + break + + return top_colours + + +def find_nearest_colour_family(colour, colour_families): + """ + Determines the nearest colour family for a given colour. + + Parameters: + colour (tuple): The colour in RGB format. + colour_families (dict): A dictionary where keys are family names and values are lists of representative RGB colours for each family. + + Returns: + str: The name of the nearest colour family. + """ + min_distance = float('inf') + nearest_family = None + + for family, representative_colours in colour_families.items(): + for rep_colour in representative_colours: + distance = np.linalg.norm(np.array(colour) - np.array(rep_colour)) + if distance < min_distance: + min_distance = distance + nearest_family = family + + return nearest_family + + +def find_nearest_colour_family(colour, colour_families): + """ + Determines the nearest colour family for a given colour based on the minimum Euclidean distance. + + Parameters: + colour (tuple): The colour in RGB format. + colour_families (dict): A dictionary where keys are family names and values are lists of representative RGB colours for each family. + + Returns: + str: The name of the nearest colour family. + """ + min_distance = float('inf') + nearest_family = None + + # Convert colour to numpy array for distance calculation + colour = np.array(colour) + + for family, family_colours in colour_families.items(): + for rep_colour in family_colours: + # Calculate the Euclidean distance + distance = np.linalg.norm(colour - np.array(rep_colour)) + if distance < min_distance: + min_distance = distance + nearest_family = family + + return nearest_family + diff --git a/common/helpers/colour_estimation/src/colour_estimation/rgb.py b/common/helpers/colour_estimation/src/colour_estimation/rgb.py index 9854f110e..40d018fdc 100644 --- a/common/helpers/colour_estimation/src/colour_estimation/rgb.py +++ b/common/helpers/colour_estimation/src/colour_estimation/rgb.py @@ -1,42 +1,163 @@ import numpy as np -RGB_COLOURS = { +COLOURS = { + "red": [255, 0, 0], + "green": [0, 255, 0], + "blue": [0, 0, 255], + "white": [255, 255, 255], + "black": [0, 0, 0], + "yellow": [255, 255, 0], + "cyan": [0, 255, 255], + "magenta": [255, 0, 255], + "gray": [128, 128, 128], + "orange": [255, 165, 0], + "purple": [128, 0, 128], + "brown": [139, 69, 19], + "pink": [255, 182, 193], + "beige": [245, 245, 220], + "maroon": [128, 0, 0], + "olive": [128, 128, 0], + "navy": [0, 0, 128], + "lime": [50, 205, 50], + "golden": [255, 223, 0], + "teal": [0, 128, 128], + "coral": [255, 127, 80], + "salmon": [250, 128, 114], + "turquoise": [64, 224, 208], + "violet": [238, 130, 238], + "platinum": [229, 228, 226], + "ochre": [204, 119, 34], + "burntsienna": [233, 116, 81], + "chocolate": [210, 105, 30], + "tan": [210, 180, 140], + "ivory": [255, 255, 240], + "goldenrod": [218, 165, 32], + "orchid": [218, 112, 214], + "honey": [238, 220, 130] + } + +SPESIFIC_COLOURS = { + "red": [255, 0, 0], + "green": [0, 255, 0], + "blue": [0, 0, 255], + "white": [255, 255, 255], + "black": [0, 0, 0], + "yellow": [255, 255, 0], + "cyan": [0, 255, 255], + "magenta": [255, 0, 255], + "gray": [128, 128, 128], + "orange": [255, 165, 0], + "purple": [128, 0, 128], + "brown": [139, 69, 19], + "pink": [255, 182, 193], + "beige": [245, 245, 220], + "maroon": [128, 0, 0], + "olive": [128, 128, 0], + "navy": [0, 0, 128], + "lime": [50, 205, 50], + "golden": [255, 223, 0], + "teal": [0, 128, 128], + "coral": [255, 127, 80], + "salmon": [250, 128, 114], + "turquoise": [64, 224, 208], + "violet": [238, 130, 238], + "platinum": [229, 228, 226], + "ochre": [204, 119, 34], + "burntsienna": [233, 116, 81], + "chocolate": [210, 105, 30], + "tan": [210, 180, 140], + "ivory": [255, 255, 240], + "goldenrod": [218, 165, 32], + "orchid": [218, 112, 214], + "honey": [238, 220, 130], + "lavender": [230, 230, 250], + "mint": [189, 252, 201], + "peach": [255, 229, 180], + "ruby": [224, 17, 95], + "indigo": [75, 0, 130], + "amber": [255, 191, 0], + "emerald": [80, 200, 120], + "sapphire": [15, 82, 186], + "aquamarine": [127, 255, 212], + "periwinkle": [204, 204, 255], + "fuchsia": [255, 0, 255], + "raspberry": [227, 11, 92], + "slate": [112, 128, 144], + "charcoal": [54, 69, 79] + } + +DETAILED_COLOURS = { + "light_red": [255, 204, 204], + "bright_red": [255, 0, 0], + "dark_red": [139, 0, 0], + "light_green": [204, 255, 204], + "bright_green": [0, 255, 0], + "dark_green": [0, 100, 0], + "light_blue": [204, 204, 255], + "bright_blue": [0, 0, 255], + "dark_blue": [0, 0, 139], + "light_yellow": [255, 255, 204], + "bright_yellow": [255, 255, 0], + "dark_yellow": [204, 204, 0], + "light_cyan": [204, 255, 255], + "bright_cyan": [0, 255, 255], + "dark_cyan": [0, 139, 139], + "light_magenta": [255, 204, 255], + "bright_magenta": [255, 0, 255], + "dark_magenta": [139, 0, 139], + "light_orange": [255, 229, 204], + "bright_orange": [255, 165, 0], + "dark_orange": [255, 140, 0], + "light_purple": [229, 204, 255], + "bright_purple": [128, 0, 128], + "dark_purple": [102, 0, 102], + "light_pink": [255, 204, 229], + "bright_pink": [255, 105, 180], + "dark_pink": [255, 20, 147], + "light_brown": [210, 180, 140], + "medium_brown": [165, 42, 42], + "dark_brown": [101, 67, 33], + # ... +} + +COLOUR_FAMILIES = { + "light_reds": [[255, 182, 193], [255, 192, 203], [255, 160, 122]], + "dark_reds": [[139, 0, 0], [178, 34, 34], [165, 42, 42]], + "light_blues": [[173, 216, 230], [135, 206, 250], [176, 224, 230]], + "dark_blues": [[0, 0, 139], [25, 25, 112], [0, 0, 128]], + "bluish_greens": [[102, 205, 170], [32, 178, 170], [72, 209, 204]], + "light_greens": [[144, 238, 144], [152, 251, 152], [143, 188, 143]], + "dark_greens": [[0, 100, 0], [34, 139, 34], [47, 79, 79]], + "yellows": [[255, 255, 0], [255, 255, 102], [255, 215, 0]], + "oranges": [[255, 165, 0], [255, 140, 0], [255, 69, 0]], + "purples": [[128, 0, 128], [147, 112, 219], [138, 43, 226]], + "pinks": [[255, 192, 203], [255, 182, 193], [255, 105, 180]], + "browns": [[165, 42, 42], [139, 69, 19], [160, 82, 45]], + "cyans": [[0, 255, 255], [0, 139, 139], [72, 209, 204]], + "greys": [[128, 128, 128], [169, 169, 169], [192, 192, 192]], + # ... +} + +SIMPLIFIED_COLOURS = { "red": [255, 0, 0], "green": [0, 255, 0], "blue": [0, 0, 255], "white": [255, 255, 255], "black": [0, 0, 0], "yellow": [255, 255, 0], - "cyan": [0, 255, 255], - "magenta": [255, 0, 255], "gray": [128, 128, 128], "orange": [255, 165, 0], "purple": [128, 0, 128], - "brown": [139, 69, 19], "pink": [255, 182, 193], + "light blue": [173, 216, 230], + "dark green": [0, 100, 0], + "light gray": [211, 211, 211], + "dark red": [139, 0, 0], "beige": [245, 245, 220], - "maroon": [128, 0, 0], - "olive": [128, 128, 0], - "navy": [0, 0, 128], - "lime": [50, 205, 50], - "golden": [255, 223, 0], - "teal": [0, 128, 128], - "coral": [255, 127, 80], - "salmon": [250, 128, 114], - "turquoise": [64, 224, 208], - "violet": [238, 130, 238], - "platinum": [229, 228, 226], - "ochre": [204, 119, 34], - "burntsienna": [233, 116, 81], - "chocolate": [210, 105, 30], - "tan": [210, 180, 140], - "ivory": [255, 255, 240], - "goldenrod": [218, 165, 32], - "orchid": [218, 112, 214], - "honey": [238, 220, 130] + "navy": [0, 0, 128] } -RGB_HAIR_COLOURS = { +HAIR_COLOURS = { 'midnight black': (9, 8, 6), 'off black': (44, 34, 43), 'strong dark brown': (58, 48, 36), @@ -65,4 +186,5 @@ 'white blonde': (255, 24, 225), 'platinum blonde': (202, 191, 177), 'russet red': (145, 74, 67), - 'terra cotta': (181, 82, 57)} + 'terra cotta': (181, 82, 57) + } \ No newline at end of file diff --git a/common/helpers/torch_module/src/torch_module/modules/__init__.py b/common/helpers/torch_module/src/torch_module/modules/__init__.py index b0eac41aa..26331726f 100644 --- a/common/helpers/torch_module/src/torch_module/modules/__init__.py +++ b/common/helpers/torch_module/src/torch_module/modules/__init__.py @@ -129,3 +129,170 @@ def forward(self, x): if self.sigmoid: x = torch.sigmoid(x) return x + + +def x2conv(in_channels, out_channels, inner_channels=None): + inner_channels = out_channels // 2 if inner_channels is None else inner_channels + down_conv = nn.Sequential( + nn.Conv2d(in_channels, inner_channels, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(inner_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inner_channels, out_channels, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True)) + return down_conv + + +class Encoder(nn.Module): + def __init__(self, in_channels, out_channels): + super(Encoder, self).__init__() + self.down_conv = x2conv(in_channels, out_channels) + self.pool = nn.MaxPool2d(kernel_size=2, ceil_mode=True) + + def forward(self, x): + x = self.down_conv(x) + x = self.pool(x) + return x + + +class Decoder(nn.Module): + def __init__(self, in_channels, out_channels): + super(Decoder, self).__init__() + self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2) + self.up_conv = x2conv(in_channels, out_channels) + + def forward(self, x_copy, x, interpolate=True): + x = self.up(x) + + if (x.size(2) != x_copy.size(2)) or (x.size(3) != x_copy.size(3)): + if interpolate: + # Iterpolating instead of padding + x = F.interpolate(x, size=(x_copy.size(2), x_copy.size(3)), + mode="bilinear", align_corners=True) + else: + # Padding in case the incomping volumes are of different sizes + diffY = x_copy.size()[2] - x.size()[2] + diffX = x_copy.size()[3] - x.size()[3] + x = F.pad(x, (diffX // 2, diffX - diffX // 2, + diffY // 2, diffY - diffY // 2)) + + # Concatenate + x = torch.cat([x_copy, x], dim=1) + x = self.up_conv(x) + return x + + +class UNetWithResnet18Encoder(nn.Module): + class Decoder(nn.Module): + def __init__(self, in_channels, skip_channels, out_channels): + super(UNetWithResnet18Encoder.Decoder, self).__init__() + self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2) + self.up_conv = x2conv(out_channels + skip_channels, out_channels) + + def forward(self, x_copy, x): + x = self.up(x) + if x.size(2) != x_copy.size(2) or x.size(3) != x_copy.size(3): + x = F.interpolate(x, size=(x_copy.size(2), x_copy.size(3)), mode='bilinear', align_corners=True) + x = torch.cat((x_copy, x), dim=1) + x = self.up_conv(x) + return x + + def __init__(self, num_classes, in_channels=3, freeze_bn=False, sigmoid=True): + super(UNetWithResnet18Encoder, self).__init__() + self.sigmoid = sigmoid + resnet18 = models.resnet18(pretrained=True) + + if in_channels != 3: + resnet18.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) + + self.encoder1 = nn.Sequential(resnet18.conv1, resnet18.bn1, resnet18.relu) + self.encoder2 = resnet18.layer1 + self.encoder3 = resnet18.layer2 + self.encoder4 = resnet18.layer3 + self.encoder5 = resnet18.layer4 + + self.up1 = UNetWithResnet18Encoder.Decoder(512, 256, 256) + self.up2 = UNetWithResnet18Encoder.Decoder(256, 128, 128) + self.up3 = UNetWithResnet18Encoder.Decoder(128, 64, 64) + self.up4 = UNetWithResnet18Encoder.Decoder(64, 64, 64) + + self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1) + self._initialize_weights() + + if freeze_bn: + self.freeze_bn() + + def _initialize_weights(self): + for module in self.modules(): + if isinstance(module, nn.Conv2d) or isinstance(module, nn.ConvTranspose2d): + nn.init.kaiming_normal_(module.weight) + if module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.BatchNorm2d): + module.weight.data.fill_(1) + module.bias.data.zero_() + + def forward(self, x): + x1 = self.encoder1(x) + x2 = self.encoder2(x1) + x3 = self.encoder3(x2) + x4 = self.encoder4(x3) + x5 = self.encoder5(x4) + + x = self.up1(x4, x5) + x = self.up2(x3, x) + x = self.up3(x2, x) + x = self.up4(x1, x) + x = F.interpolate(x, size=(x.size(2)*2, x.size(3)*2), mode='bilinear', align_corners=True) + + x = self.final_conv(x) + + if self.sigmoid: + x = torch.sigmoid(x) + return x + + def freeze_bn(self): + for module in self.modules(): + if isinstance(module, nn.BatchNorm2d): + module.eval() + + +class MultiLabelResNet(nn.Module): + def __init__(self, num_labels, input_channels=3, sigmoid=True, pretrained=True,): + super(MultiLabelResNet, self).__init__() + self.model = models.resnet18(pretrained=pretrained) + self.sigmoid = sigmoid + + if input_channels != 3: + self.model.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) + + num_ftrs = self.model.fc.in_features + + self.model.fc = nn.Linear(num_ftrs, num_labels) + + def forward(self, x): + x = self.model(x) + if self.sigmoid: + x = torch.sigmoid(x) + return x + + +class CombinedModel(nn.Module): + def __init__(self, segment_model: nn.Module, predict_model: nn.Module, cat_layers:int=None): + super(CombinedModel, self).__init__() + self.segment_model = segment_model + self.predict_model = predict_model + self.cat_layers = cat_layers + + def forward(self, x: torch.Tensor): + seg_masks = self.segment_model(x) + + if self.cat_layers: + seg_masks_ = seg_masks[:, 0:self.cat_layers] + x = torch.cat((x, seg_masks_), dim=1) + else: + x = torch.cat((x, seg_masks), dim=1) + + logic_outputs = self.predict_model(x) + return seg_masks, logic_outputs + diff --git a/common/vision/lasr_vision_msgs/msg/FeatureWithColour.msg b/common/vision/lasr_vision_msgs/msg/FeatureWithColour.msg index fe9ca3d71..21702415b 100644 --- a/common/vision/lasr_vision_msgs/msg/FeatureWithColour.msg +++ b/common/vision/lasr_vision_msgs/msg/FeatureWithColour.msg @@ -1,5 +1,8 @@ # Feature name string name +# Feature label +bool label + # Colour predictions -lasr_vision_msgs/ColourPrediction[] colours +string[] colours diff --git a/common/vision/lasr_vision_msgs/srv/TorchFaceFeatureDetection.srv b/common/vision/lasr_vision_msgs/srv/TorchFaceFeatureDetection.srv index e958bcd57..fe7aa0812 100644 --- a/common/vision/lasr_vision_msgs/srv/TorchFaceFeatureDetection.srv +++ b/common/vision/lasr_vision_msgs/srv/TorchFaceFeatureDetection.srv @@ -11,4 +11,5 @@ string torso_mask_dtype --- # Detection result -lasr_vision_msgs/FeatureWithColour[] detected_features \ No newline at end of file +lasr_vision_msgs/FeatureWithColour[] detected_features +# string detected_features diff --git a/common/vision/lasr_vision_torch/nodes/service b/common/vision/lasr_vision_torch/nodes/service index ec5081ba7..fb51c5265 100644 --- a/common/vision/lasr_vision_torch/nodes/service +++ b/common/vision/lasr_vision_torch/nodes/service @@ -1,74 +1,43 @@ from lasr_vision_msgs.srv import TorchFaceFeatureDetection, TorchFaceFeatureDetectionRequest, TorchFaceFeatureDetectionResponse from lasr_vision_msgs.msg import FeatureWithColour, ColourPrediction -from colour_estimation import closest_colours, RGB_COLOURS, RGB_HAIR_COLOURS from cv2_img import msg_to_cv2_img from torch_module.helpers import binary_erosion_dilation, median_color_float from numpy2message import message2numpy import numpy as np +import cv2 import torch import rospy +import rospkg import lasr_vision_torch - - -model = lasr_vision_torch.load_face_classifier_model() +from os import path def detect(request: TorchFaceFeatureDetectionRequest) -> TorchFaceFeatureDetectionResponse: # decode the image rospy.loginfo('Decoding') - frame = msg_to_cv2_img(request.image_raw) + full_frame = msg_to_cv2_img(request.image_raw) torso_mask_data, torso_mask_shape, torso_mask_dtype = request.torso_mask_data, request.torso_mask_shape, request.torso_mask_dtype head_mask_data, head_mask_shape, head_mask_dtype = request.head_mask_data, request.head_mask_shape, request.head_mask_dtype - torsal_mask = message2numpy(torso_mask_data, torso_mask_shape, torso_mask_dtype) + torso_mask = message2numpy(torso_mask_data, torso_mask_shape, torso_mask_dtype) head_mask = message2numpy(head_mask_data, head_mask_shape, head_mask_dtype) - # print(torso_mask_shape) - # print(head_mask_shape) - - # 'hair', 'hat', 'glasses', 'face' - input_image = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float() - input_image /= 255.0 - masks_batch_pred, pred_classes = model(input_image) - - thresholds_mask = [ - 0.5, 0.75, 0.25, 0.5, # 0.5, 0.5, 0.5, 0.5, - ] - thresholds_pred = [ - 0.6, 0.8, 0.1, 0.5, - ] - erosion_iterations = 1 - dilation_iterations = 1 - categories = ['hair', 'hat', 'glasses', 'face',] - - masks_batch_pred = binary_erosion_dilation( - masks_batch_pred, thresholds=thresholds_mask, - erosion_iterations=erosion_iterations, dilation_iterations=dilation_iterations - ) - - median_colours = (median_color_float( - input_image, masks_batch_pred).detach().squeeze(0)*255).numpy().astype(np.uint8) - - # discarded: masks = masks_batch_pred.detach().squeeze(0).numpy().astype(np.uint8) - # discarded: mask_list = [masks[i,:,:] for i in range(masks.shape[0])] - - pred_classes = pred_classes.detach().squeeze(0).numpy() - # discarded: class_list = [categories[i] for i in range( - # pred_classes.shape[0]) if pred_classes[i].item() > thresholds_pred[i]] - colour_list = [median_colours[i, :] - for i in range(median_colours.shape[0])] + head_frame = lasr_vision_torch.extract_mask_region(full_frame, head_mask.astype(np.uint8), expand_x=0.4, expand_y=0.5) + torso_frame = lasr_vision_torch.extract_mask_region(full_frame, torso_mask.astype(np.uint8), expand_x=0.2, expand_y=0.0) + class_pred, colour_pred = lasr_vision_torch.predict_frame(head_frame, torso_frame, full_frame, head_mask, torso_mask, lasr_vision_torch.model, lasr_vision_torch.thresholds_mask, lasr_vision_torch.erosion_iterations, lasr_vision_torch.dilation_iterations, lasr_vision_torch.thresholds_pred) + response = TorchFaceFeatureDetectionResponse() - response.detected_features = [ - FeatureWithColour(categories[i], [ - ColourPrediction(colour, distance) - for colour, distance - in closest_colours(colour_list[i], RGB_HAIR_COLOURS if categories[i] == 'hair' else RGB_COLOURS) - ]) - for i - in range(pred_classes.shape[0]) - if pred_classes[i].item() > thresholds_pred[i] - ] - + # response.detected_features = str(class_pred) + str(colour_pred) + response.detected_features = [] + for c in ['hair', 'hat', 'glasses', 'cloth',]: + colour_pred[c] = {k: v[0] for k, v in colour_pred[c].items()} + sorted_list = sorted(colour_pred[c].items(), key=lambda item: item[1], reverse=True) + rospy.loginfo(str(sorted_list)) + if len(sorted_list) > 3: + sorted_list = sorted_list[0:3] + sorted_list = [k for k, v in sorted_list] + # rospy.loginfo(str(colour_pred[c])) + response.detected_features.append(FeatureWithColour(c, class_pred[c], sorted_list)) return response @@ -80,7 +49,7 @@ def detect(request: TorchFaceFeatureDetectionRequest) -> TorchFaceFeatureDetecti # # 'hair', 'hat', 'glasses', 'face' # input_image = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float() # input_image /= 255.0 -# masks_batch_pred, pred_classes = model(input_image) +# masks_batch_pred, pred_classes = lasr_vision_torch.model(input_image) # thresholds_mask = [ # 0.5, 0.75, 0.25, 0.5, # 0.5, 0.5, 0.5, 0.5, @@ -110,16 +79,17 @@ def detect(request: TorchFaceFeatureDetectionRequest) -> TorchFaceFeatureDetecti # for i in range(median_colours.shape[0])] # response = TorchFaceFeatureDetectionResponse() -# response.detected_features = [ -# FeatureWithColour(categories[i], [ -# ColourPrediction(colour, distance) -# for colour, distance -# in closest_colours(colour_list[i], RGB_HAIR_COLOURS if categories[i] == 'hair' else RGB_COLOURS) -# ]) -# for i -# in range(pred_classes.shape[0]) -# if pred_classes[i].item() > thresholds_pred[i] -# ] +# # response.detected_features = [ +# # FeatureWithColour(categories[i], [ +# # ColourPrediction(colour, distance) +# # for colour, distance +# # in closest_colours(colour_list[i], HAIR_COLOURS if categories[i] == 'hair' else COLOURS) +# # ]) +# # for i +# # in range(pred_classes.shape[0]) +# # if pred_classes[i].item() > thresholds_pred[i] +# # ] +# response.detected_features = "feature" # return response # test test diff --git a/common/vision/lasr_vision_torch/src/lasr_vision_torch/__init__.py b/common/vision/lasr_vision_torch/src/lasr_vision_torch/__init__.py index fa62dbc3f..b1d380b4a 100644 --- a/common/vision/lasr_vision_torch/src/lasr_vision_torch/__init__.py +++ b/common/vision/lasr_vision_torch/src/lasr_vision_torch/__init__.py @@ -1,21 +1,317 @@ -from torch_module.modules import DeepLabV3PlusMobileNetV3, MultiLabelMobileNetV3Large, CombinedModelNoRegression -from torch_module.helpers import load_torch_model +from torch_module.modules import UNetWithResnet18Encoder, MultiLabelResNet, CombinedModel # DeepLabV3PlusMobileNetV3, MultiLabelMobileNetV3Large, CombinedModelNoRegression +from torch_module.helpers import load_torch_model, binary_erosion_dilation +from colour_estimation import closest_colours, load_images_to_dict, generate_colour_table, average_colours_by_label, count_colours_in_masked_area +from colour_estimation import COLOURS, HAIR_COLOURS, SPESIFIC_COLOURS, DETAILED_COLOURS + +import numpy as np +import cv2 +import torch +import rospy import rospkg +import lasr_vision_torch from os import path def load_face_classifier_model(): cat_layers = 4 - # 'cloth', 'hair', 'hat', 'glasses', 'face', - segment_model = DeepLabV3PlusMobileNetV3(num_classes=4) - # 'hair', 'hat', 'glasses', 'face', ; first three with colours, rgb - predict_model = MultiLabelMobileNetV3Large(cat_layers, 7) - model = CombinedModelNoRegression( - segment_model, predict_model, cat_layers=cat_layers) + segment_model = UNetWithResnet18Encoder(num_classes=4) + predict_model = MultiLabelResNet(num_labels=4, input_channels=7) + model = CombinedModel(segment_model, predict_model, cat_layers=cat_layers) model.eval() r = rospkg.RosPack() model, _, _, _ = load_torch_model(model, None, path=path.join(r.get_path( - "lasr_vision_torch"), "models", "best_model_epoch_31.pth"), cpu_only=True) + "lasr_vision_torch"), "models", "model.pth"), cpu_only=True) return model + + +model = load_face_classifier_model() +# setups +face_th_rate = 0.05 +thresholds_mask = [ + 0.5, 0.75, 0.25, 0.5, # 0.5, 0.5, 0.5, 0.5, +] +thresholds_pred = [ + 0.6, 0.8, 0.1, 0.5, +] +erosion_iterations = 1 +dilation_iterations = 1 +colour_distance_rate = 1.2 +categories = ['hair', 'hat', 'glasses', 'face',] +cat_layers = 4 + +# prepare hair colour table +r = rospkg.RosPack() +image_dict = load_images_to_dict(path.join(r.get_path( + "colour_estimation"), "hair_colours")) +hair_colour_table = generate_colour_table(image_dict, SPESIFIC_COLOURS) + + +def pad_image_to_even_dims(image): + # Get the current shape of the image + height, width, _ = image.shape + + # Calculate the padding needed for height and width + height_pad = 0 if height % 2 == 0 else 1 + width_pad = 0 if width % 2 == 0 else 1 + + # Pad the image. Pad the bottom and right side of the image + padded_image = np.pad(image, ((0, height_pad), (0, width_pad), (0, 0)), mode='constant', constant_values=0) + + return padded_image + + +def extract_mask_region(frame, mask, expand_x=0.5, expand_y=0.5): + """ + Extracts the face region from the image and expands the region by the specified amount. + + :param frame: The source image. + :param mask: The mask with the face part. + :param expand_x: The percentage to expand the width of the bounding box. + :param expand_y: The percentage to expand the height of the bounding box. + :return: The extracted face region as a numpy array, or None if not found. + """ + contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + if contours: + largest_contour = max(contours, key=cv2.contourArea) + x, y, w, h = cv2.boundingRect(largest_contour) + + # Expand the bounding box + new_w = w * (1 + expand_x) + new_h = h * (1 + expand_y) + x -= (new_w - w) // 2 + y -= (new_h - h) // 2 + + # Ensure the new bounding box is within the frame dimensions + x = int(max(0, x)) + y = int(max(0, y)) + new_w = min(frame.shape[1] - x, new_w) + new_h = min(frame.shape[0] - y, new_h) + + face_region = frame[y:y+int(new_h), x:x+int(new_w)] + return face_region + return None + + +def process_head(head_frame, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred): + """ + Processes the head frame to extract class counts and color information for head-related classes. + + Args: + - head_frame (np.ndarray): The head frame extracted by the BodyPix model. + - model: A PyTorch model instance for classifying and predicting masks for head features. + - thresholds_mask, erosion_iterations, dilation_iterations: Thresholds and iteration counts for binary erosion and dilation. + - thresholds_pred: A list of prediction thresholds. + + Returns: + - Tuple[dict, dict]: A tuple containing two dictionaries: + - head_class_count: A dictionary with counts for each head-related class. + - head_class_colours: A dictionary with color information for each head-related class. + """ + head_class_count = { + 'hair': 0, + 'hat': 0, + 'glasses': 0, + } + head_class_colours = { + 'hair': {}, + 'hat': {}, + 'glasses': {}, + } + + if head_frame is not None: + # try: + # _head_frame_bgr = cv2.cvtColor(head_frame, cv2.COLOR_RGB2BGR) + # cv2.imshow('Head Frame', _head_frame_bgr) + # except Exception as ignore: + # pass + + # Convert head frame to PyTorch tensor and normalize + head_frame_tensor = torch.from_numpy(head_frame).permute(2, 0, 1).unsqueeze(0).float() / 255.0 + masks_batch_pred, pred_classes = model(head_frame_tensor) + + # Apply binary erosion and dilation to the masks + processed_masks = binary_erosion_dilation( + masks_batch_pred, thresholds=thresholds_mask, + erosion_iterations=erosion_iterations, dilation_iterations=dilation_iterations + ) + masks = processed_masks.detach().squeeze(0).numpy().astype(np.uint8) + mask_list = [masks[i,:,:] for i in range(masks.shape[0])] + pred_classes = pred_classes.detach().squeeze(0).numpy() + + # Determine if each class is present + class_list = [pred_classes[i].item() > thresholds_pred[i] for i in range(pred_classes.shape[0])] + + # Update class count + for each_class, k in zip(class_list[0:3], ['hair', 'hat', 'glasses']): + head_class_count[k] = int(each_class) + + # Update class colours + for f, each_mask, k, c_map in zip([head_frame, head_frame, head_frame], mask_list[0:2], ['hair', 'hat', 'glasses'], [SPESIFIC_COLOURS, DETAILED_COLOURS, DETAILED_COLOURS]): + colours = count_colours_in_masked_area(f, each_mask, c_map, sort=True)[1] + for colour in colours: + if colour[0] not in head_class_colours[k]: + head_class_colours[k][colour[0]] = [colour[1]] + else: + head_class_colours[k][colour[0]].append(colour[1]) + + return head_class_count, head_class_colours + + +def process_cloth(full_frame, torso_mask): + """ + Processes the full frame with the torso mask to extract class counts and color information for cloth. + + Args: + - full_frame (np.ndarray): The full original frame from the video source. + - torso_mask (np.ndarray): The torso mask extracted by the BodyPix model. + + Returns: + - Tuple[dict, dict]: A tuple containing two dictionaries: + - cloth_class_count: A dictionary with counts for the cloth class. + - cloth_class_colours: A dictionary with color information for the cloth class. + """ + cloth_class_count = { + 'cloth': 0, + } + cloth_class_colours = { + 'cloth': {}, + } + + # Check if cloth is detected + if torso_mask is not None and np.sum(torso_mask) >= 50: + cloth_class_count['cloth'] = 1 + + # Update cloth colours + colours = count_colours_in_masked_area(full_frame, torso_mask, DETAILED_COLOURS, sort=True)[1] + for colour in colours: + if colour[0] not in cloth_class_colours['cloth']: + cloth_class_colours['cloth'][colour[0]] = [colour[1]] + else: + cloth_class_colours['cloth'][colour[0]].append(colour[1]) + + return cloth_class_count, cloth_class_colours + + +# you can use this function directly for prediction. +def predict_frame(head_frame, torso_frame, full_frame, head_mask, torso_mask, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred): + """ + Predicts classes and color information for a single processed video frame. + + Args: + - head_frame (np.ndarray): The head frame extracted by the BodyPix model. + - full_frame (np.ndarray): The full original frame from the video source. + - head_mask (np.ndarray): The head mask extracted by the BodyPix model. + - torso_mask (np.ndarray): The torso mask extracted by the BodyPix model. + - model: A PyTorch model instance for classifying and predicting masks for head features. + - thresholds_mask, erosion_iterations, dilation_iterations: Thresholds and iteration counts for binary erosion and dilation. + - thresholds_pred: A list of prediction thresholds. + + Returns: + - Tuple[dict, dict]: A tuple containing: + - class_pred: A dictionary with predicted classes for the single frame. + - colour_pred: A dictionary with predicted colors for the single frame. + """ + class_count = { + 'hair': 0, + 'hat': 0, + 'glasses': 0, + 'cloth': 0, + } + class_colours = { + 'hair': {}, + 'hat': {}, + 'glasses': {}, + 'cloth': {}, + } + + head_frame = pad_image_to_even_dims(head_frame) + torso_frame = pad_image_to_even_dims(torso_frame) + + # Process head and cloth separately for the single frame + head_class_count, head_class_colours = process_head(head_frame, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred) + cloth_class_count, cloth_class_colours = process_cloth(full_frame, torso_mask) + + # Update class counts and colours + for k in head_class_count: + class_count[k] = head_class_count[k] + class_colours[k] = head_class_colours[k] + + class_count['cloth'] = cloth_class_count['cloth'] + class_colours['cloth'] = cloth_class_colours['cloth'] + + # Compute final class predictions and colors for the single frame + class_pred = {k: bool(class_count[k]) for k in class_count} + colour_pred = {k: v for k, v in class_colours.items()} + + # class_pred, colour_pred = None, None + + return class_pred, colour_pred + + +# # if able to provide multiple frames (see __main__ seciton), then this should work better than the single frame version. +# def predict_frames(head_frames, torso_frames, full_frames, head_masks, torso_masks, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred, SPESIFIC_COLOURS): +# """ +# Predicts classes and color information for a sequence of processed video frames. + +# Args: +# - head_frames (list[np.ndarray]): List of head frames extracted by the BodyPix model. +# - torso_frames (list[np.ndarray]): List of body frames extracted by the BodyPix model. +# - full_frames (list[np.ndarray]): List of full original frames from the video source. +# - head_masks (list[np.ndarray]): List of head masks extracted by the BodyPix model. +# - torso_masks (list[np.ndarray]): List of torso masks extracted by the BodyPix model. +# - model: A PyTorch model instance for classifying and predicting masks for head features. +# - thresholds_mask, erosion_iterations, dilation_iterations: Thresholds and iteration counts for binary erosion and dilation. +# - thresholds_pred: A list of prediction thresholds. +# - SPESIFIC_COLOURS: A dictionary of specific colors. + +# Returns: +# - Tuple[dict, dict]: A tuple containing: +# - class_pred: A dictionary with predicted classes. +# - colour_pred: A dictionary with predicted colors. +# """ +# total_class_count = { +# 'hair': [], +# 'hat': [], +# 'glasses': [], +# 'cloth': [], +# } +# total_class_colours = { +# 'hair': {}, +# 'hat': {}, +# 'glasses': {}, +# 'cloth': {}, +# } + +# for head_frame, torso_frame, full_frame, head_mask, torso_mask in zip(head_frames, torso_frames, full_frames, head_masks, torso_masks): +# head_frame = pad_image_to_even_dims(head_frame) +# torso_frame = pad_image_to_even_dims(torso_frame) + +# # Process head and cloth separately +# head_class_count, head_class_colours = process_head(head_frame, model, thresholds_mask, erosion_iterations, dilation_iterations, thresholds_pred) +# cloth_class_count, cloth_class_colours = process_cloth(full_frame, torso_mask) + +# # Accumulate class counts and colours +# for k in head_class_count: +# total_class_count[k].append(head_class_count[k]) +# if k in head_class_colours: +# for colour, count in head_class_colours[k].items(): +# if colour not in total_class_colours[k]: +# total_class_colours[k][colour] = count +# else: +# total_class_colours[k][colour].extend(count) + +# total_class_count['cloth'].append(cloth_class_count['cloth']) +# for colour, count in cloth_class_colours['cloth'].items(): +# if colour not in total_class_colours['cloth']: +# total_class_colours['cloth'][colour] = count +# else: +# total_class_colours['cloth'][colour].extend(count) + +# # Compute final class predictions and colors +# class_pred = {k: sum(v) >= len(v) / 2 for k, v in total_class_count.items()} +# colour_pred = average_colours_by_label(total_class_count, total_class_colours) + +# return class_pred, colour_pred + diff --git a/skills/scripts/unit_test_describe_people.py b/skills/scripts/unit_test_describe_people.py index 3cf73d34c..3445980b9 100644 --- a/skills/scripts/unit_test_describe_people.py +++ b/skills/scripts/unit_test_describe_people.py @@ -14,6 +14,7 @@ sm.add('DESCRIBE', DescribePeople(), transitions={ 'succeeded': 'end', 'failed': 'end'}) + # while True: sm.execute() print('\n\nDetected people:', sm.userdata['people']) diff --git a/skills/src/lasr_skills/describe_people.py b/skills/src/lasr_skills/describe_people.py index d64b07cee..29a37c1ae 100644 --- a/skills/src/lasr_skills/describe_people.py +++ b/skills/src/lasr_skills/describe_people.py @@ -6,7 +6,7 @@ import cv2_img import numpy as np -from colour_estimation import closest_colours, RGB_COLOURS +# from colour_estimation import closest_colours, RGB_COLOURS from lasr_vision_msgs.msg import BodyPixMaskRequest, ColourPrediction, FeatureWithColour from lasr_vision_msgs.srv import YoloDetection, BodyPixDetection, TorchFaceFeatureDetection from numpy2message import numpy2message @@ -171,57 +171,57 @@ def execute(self, userdata): head_mask_data, head_mask_shape, head_mask_dtype, ).detected_features) - # process part masks - for (bodypix_mask, part) in zip(userdata.bodypix_masks, ['torso', 'head']): - part_mask = np.array(bodypix_mask.mask).reshape( - bodypix_mask.shape[0], bodypix_mask.shape[1]) - - # filter out part for current person segmentation - try: - part_mask[mask_bin == 0] = 0 - except Exception: - rospy.logdebug('|> Failed to check {part} is visible') - continue - - if part_mask.any(): - rospy.logdebug(f'|> Person has {part} visible') - else: - rospy.logdebug( - f'|> Person does not have {part} visible') - continue - - # do colour processing on the torso - if part == 'torso': - try: - features.append(FeatureWithColour("torso", [ - ColourPrediction(colour, distance) - for colour, distance - in closest_colours(np.median(img[part_mask == 1], axis=0), RGB_COLOURS) - ])) - except Exception as e: - rospy.logerr(f"Failed to process colour: {e}") - - # do feature extraction on the head - if part == 'head': - try: - # crop out face - face_mask = np.array(userdata.bodypix_masks[1].mask).reshape( - userdata.bodypix_masks[1].shape[0], userdata.bodypix_masks[1].shape[1]) - - mask_image_only_face = mask_image.copy() - mask_image_only_face[face_mask == 0] = 0 - - face_region = cv2_img.extract_mask_region( - img, mask_image_only_face) - if face_region is None: - raise Exception( - "Failed to extract mask region") - - msg = cv2_img.cv2_img_to_msg(face_region) - features.extend(self.torch_face_features( - msg, False).detected_features) - except Exception as e: - rospy.logerr(f"Failed to process extraction: {e}") + # # process part masks + # for (bodypix_mask, part) in zip(userdata.bodypix_masks, ['torso', 'head']): + # part_mask = np.array(bodypix_mask.mask).reshape( + # bodypix_mask.shape[0], bodypix_mask.shape[1]) + + # # filter out part for current person segmentation + # try: + # part_mask[mask_bin == 0] = 0 + # except Exception: + # rospy.logdebug('|> Failed to check {part} is visible') + # continue + + # if part_mask.any(): + # rospy.logdebug(f'|> Person has {part} visible') + # else: + # rospy.logdebug( + # f'|> Person does not have {part} visible') + # continue + + # # do colour processing on the torso + # if part == 'torso': + # try: + # features.append(FeatureWithColour("torso", [ + # ColourPrediction(colour, distance) + # for colour, distance + # in closest_colours(np.median(img[part_mask == 1], axis=0), RGB_COLOURS) + # ])) + # except Exception as e: + # rospy.logerr(f"Failed to process colour: {e}") + + # # do feature extraction on the head + # if part == 'head': + # try: + # # crop out face + # face_mask = np.array(userdata.bodypix_masks[1].mask).reshape( + # userdata.bodypix_masks[1].shape[0], userdata.bodypix_masks[1].shape[1]) + + # mask_image_only_face = mask_image.copy() + # mask_image_only_face[face_mask == 0] = 0 + + # face_region = cv2_img.extract_mask_region( + # img, mask_image_only_face) + # if face_region is None: + # raise Exception( + # "Failed to extract mask region") + + # msg = cv2_img.cv2_img_to_msg(face_region) + # features.extend(self.torch_face_features( + # msg, False).detected_features) + # except Exception as e: + # rospy.logerr(f"Failed to process extraction: {e}") people.append({ 'detection': person,