Added cloth detection and classification in.

LASR-at-Home · May 17, 2024 · 7f3a76f · 7f3a76f
1 parent aea2739
commit 7f3a76f
Show file tree

Hide file tree

Showing 4 changed files with 134 additions and 9 deletions.
diff --git a/common/vision/lasr_vision_feature_extraction/nodes/service b/common/vision/lasr_vision_feature_extraction/nodes/service
@@ -22,16 +22,20 @@ def detect(request: TorchFaceFeatureDetectionDescriptionRequest) -> TorchFaceFea
     head_mask = message2numpy(head_mask_data, head_mask_shape, head_mask_dtype)
     head_frame = lasr_vision_feature_extraction.extract_mask_region(full_frame, head_mask.astype(np.uint8), expand_x=0.4, expand_y=0.5)
     torso_frame = lasr_vision_feature_extraction.extract_mask_region(full_frame, torso_mask.astype(np.uint8), expand_x=0.2, expand_y=0.0)
-    rst_str = lasr_vision_feature_extraction.predict_frame(head_frame, torso_frame, full_frame, head_mask, torso_mask, predictor=predictor)
+    rst_str = lasr_vision_feature_extraction.predict_frame(
+        head_frame, torso_frame, full_frame, head_mask, torso_mask, head_predictor=head_predictor, cloth_predictor=cloth_predictor,
+    )
     response = TorchFaceFeatureDetectionDescriptionResponse()
     response.description = rst_str
     return response
 
 
 if __name__ == '__main__':
     # predictor will be global when inited, thus will be used within the function above.
-    model = lasr_vision_feature_extraction.load_face_classifier_model()
-    predictor = lasr_vision_feature_extraction.Predictor(model, torch.device('cpu'), CelebAMaskHQCategoriesAndAttributes)
+    head_model = lasr_vision_feature_extraction.load_face_classifier_model()
+    head_predictor = lasr_vision_feature_extraction.Predictor(head_model, torch.device('cpu'), CelebAMaskHQCategoriesAndAttributes)
+    cloth_model = lasr_vision_feature_extraction.load_cloth_classidifer_model()
+    cloth_predictor = lasr_vision_feature_extraction.Predictor(cloth_model, torch.device('cpu'), CelebAMaskHQCategoriesAndAttributes)
     rospy.init_node('torch_service')
     rospy.Service('/torch/detect/face_features', TorchFaceFeatureDetectionDescription, detect)
     rospy.loginfo('Torch service started')

diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py
@@ -1,10 +1,12 @@
 from lasr_vision_feature_extraction.categories_and_attributes import (
     CategoriesAndAttributes,
     CelebAMaskHQCategoriesAndAttributes,
+    DeepFashion2GeneralizedCategoriesAndAttributes,
 )
 from lasr_vision_feature_extraction.image_with_masks_and_attributes import (
     ImageWithMasksAndAttributes,
     ImageOfPerson,
+    ImageOfCloth,
 )
 
 import numpy as np
@@ -15,6 +17,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 import torchvision.models as models
+import json
 
 
 def X2conv(in_channels, out_channels, inner_channels=None):
@@ -385,12 +388,27 @@ def load_face_classifier_model():
         model,
         None,
         path=path.join(
-            r.get_path("lasr_vision_feature_extraction"), "models", "model.pth"
+            r.get_path("lasr_vision_feature_extraction"), "models", "face_model.pth"
         ),
         cpu_only=True,
     )
     return model
 
+def load_cloth_classidifer_model():
+    num_classes = len(DeepFashion2GeneralizedCategoriesAndAttributes.attributes)
+    model = SegmentPredictorBbox(num_masks=num_classes + 4, num_labels=num_classes + 4, num_bbox_classes=4)
+    model.eval()
+
+    r = rospkg.RosPack()
+    model, _, _, _ = load_torch_model(
+        model,
+        None,
+        path=path.join(
+            r.get_path("lasr_vision_feature_extraction"), "models", "cloth_model.pth"
+        ),
+        cpu_only=True,
+    )
+    return model
 
 def pad_image_to_even_dims(image):
     # Get the current shape of the image
@@ -444,7 +462,7 @@ def extract_mask_region(frame, mask, expand_x=0.5, expand_y=0.5):
 
 
 def predict_frame(
-    head_frame, torso_frame, full_frame, head_mask, torso_mask, predictor
+    head_frame, torso_frame, full_frame, head_mask, torso_mask, head_predictor, cloth_predictor,
 ):
     full_frame = cv2.cvtColor(full_frame, cv2.COLOR_BGR2RGB)
     head_frame = cv2.cvtColor(head_frame, cv2.COLOR_BGR2RGB)
@@ -453,9 +471,18 @@ def predict_frame(
     head_frame = pad_image_to_even_dims(head_frame)
     torso_frame = pad_image_to_even_dims(torso_frame)
 
-    rst = ImageOfPerson.from_parent_instance(predictor.predict(head_frame))
+    rst_person = ImageOfPerson.from_parent_instance(head_predictor.predict(head_frame)).describe()
+    rst_cloth = ImageOfCloth.from_parent_instance(torso_frame.predict(torso_frame)).describe()
+
+    # results from two dictionaries are currently merged but might got separated again in the future if needed.
+    result = {
+        'attributes': rst_person['rst_person'] + rst_cloth['rst_person'],
+        'description': rst_person['rst_person'] + rst_cloth['rst_person'],
+    }
+
+    result = json.dumps(result, indent=4)
 
-    return rst.describe()
+    return result
 
 
 def load_torch_model(model, optimizer, path="model.pth", cpu_only=False):

diff --git a/...vision_feature_extraction/src/lasr_vision_feature_extraction/categories_and_attributes.py b/...vision_feature_extraction/src/lasr_vision_feature_extraction/categories_and_attributes.py
@@ -167,3 +167,44 @@ class CelebAMaskHQCategoriesAndAttributes(CategoriesAndAttributes):
     thresholds_pred["Wearing_Earrings"] = 0.5
     thresholds_pred["Wearing_Necklace"] = 0.5
     thresholds_pred["Wearing_Necktie"] = 0.5
+
+
+class DeepFashion2GeneralizedCategoriesAndAttributes(CategoriesAndAttributes):
+    mask_categories = [
+        'short sleeve top', 'long sleeve top', 'short sleeve outwear',
+        'long sleeve outwear', 'vest', 'sling', 'shorts',
+        'trousers', 'skirt', 'short sleeve dress',
+        'long sleeve dress', 'vest dress', 'sling dress'
+    ]
+    merged_categories = {
+        'top': ['short sleeve top', 'long sleeve top', 'vest', 'sling', ],
+        'down': ['shorts', 'trousers', 'skirt', ],
+        'outwear': ['short sleeve outwear', 'long sleeve outwear', ],
+        'dress': ['short sleeve dress', 'long sleeve dress', 'vest dress', 'sling dress', ],
+    }
+    _categories_to_merge = []
+    for key in sorted(list(merged_categories.keys())):
+        for cat in merged_categories[key]:
+            _categories_to_merge.append(cat)
+    for key in mask_categories:
+        if key not in _categories_to_merge:
+            merged_categories[key] = [key]
+    mask_labels = []
+    selective_attributes = {}
+    plane_attributes = []
+    avoided_attributes = []
+    attributes = [
+        'short sleeve top', 'long sleeve top', 'short sleeve outwear',
+        'long sleeve outwear', 'vest', 'sling', 'shorts',
+        'trousers', 'skirt', 'short sleeve dress',
+        'long sleeve dress', 'vest dress', 'sling dress'
+    ]
+
+    thresholds_mask: dict[str, float] = {}
+    thresholds_pred: dict[str, float] = {}
+
+    # set default thresholds:
+    for key in sorted(merged_categories.keys()):
+        thresholds_mask[key] = 0.5
+    for key in attributes + mask_labels:
+        thresholds_pred[key] = 0.5
diff --git a/..._feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py b/..._feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
@@ -2,7 +2,6 @@
 from lasr_vision_feature_extraction.categories_and_attributes import (
     CategoriesAndAttributes,
 )
-import json
 
 
 def _softmax(x: list[float]) -> list[float]:
@@ -205,6 +204,60 @@ def describe(self) -> str:
             "description": description,
         }
 
-        result = json.dumps(result, indent=4)
+        return result
+
+
+class ImageOfCloth(ImageWithMasksAndAttributes):
+    def __init__(
+        self,
+        image: np.ndarray,
+        masks: dict[str, np.ndarray],
+        attributes: dict[str, float],
+        categories_and_attributes: CategoriesAndAttributes,
+    ):
+        super().__init__(image, masks, attributes, categories_and_attributes)
+
+    @classmethod
+    def from_parent_instance(
+        cls, parent_instance: ImageWithMasksAndAttributes
+    ) -> "ImageOfCloth":
+        """
+        Creates an instance of ImageOfCloth using the properties of an
+        instance of ImageWithMasksAndAttributes.
+        """
+        return cls(
+            image=parent_instance.image,
+            masks=parent_instance.masks,
+            attributes=parent_instance.attributes,
+            categories_and_attributes=parent_instance.categories_and_attributes,
+        )
+
+    def describe(self) -> str:
+        result = {
+            # not in a loop for now, likely to add more logic combined with a classifier of more specific cloth classes.
+            "attributes": {
+                "top": self.attributes["top"] > self.categories_and_attributes.thresholds_pred["top"],
+                "down": self.attributes["down"] > self.categories_and_attributes.thresholds_pred["down"],
+                "outwear": self.attributes["outwear"] > self.categories_and_attributes.thresholds_pred["outwear"],
+                "dress": self.attributes["dress"] > self.categories_and_attributes.thresholds_pred["dress"],
+
+                "short sleeve top": self.attributes["short sleeve top"] > self.categories_and_attributes.thresholds_pred["short sleeve top"],
+                "long sleeve top": self.attributes["long sleeve top"] > self.categories_and_attributes.thresholds_pred["long sleeve top"],
+                "short sleeve outwear": self.attributes["short sleeve outwear"] > self.categories_and_attributes.thresholds_pred["short sleeve outwear"],
+                "long sleeve outwear": self.attributes["long sleeve outwear"] > self.categories_and_attributes.thresholds_pred["long sleeve outwear"],
+                "vest": self.attributes["vest"] > self.categories_and_attributes.thresholds_pred["vest"],
+                "sling": self.attributes["sling"] > self.categories_and_attributes.thresholds_pred["sling"],
+                "outwear": self.attributes["outwear"] > self.categories_and_attributes.thresholds_pred["outwear"],
+                "shorts": self.attributes["shorts"] > self.categories_and_attributes.thresholds_pred["shorts"],
+                "trousers": self.attributes["trousers"] > self.categories_and_attributes.thresholds_pred["trousers"],
+                "skirt": self.attributes["skirt"] > self.categories_and_attributes.thresholds_pred["skirt"],
+                "short sleeve dress": self.attributes["short sleeve dress"] > self.categories_and_attributes.thresholds_pred["short sleeve dress"],
+                "long sleeve dress": self.attributes["long sleeve dress"] > self.categories_and_attributes.thresholds_pred["long sleeve dress"],
+                "vest dress": self.attributes["vest dress"] > self.categories_and_attributes.thresholds_pred["vest dress"],
+                "sling dress": self.attributes["sling dress"] > self.categories_and_attributes.thresholds_pred["sling dress"],
+            },
+            "description": "this descrcription will be completed if we find out it is better to do it here.",
+        }
 
         return result
+