LASR-at-Home · MBTMBTMBT · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/common/helpers/navigation_helpers/src/navigation_helpers/__init__.py b/common/helpers/navigation_helpers/src/navigation_helpers/__init__.py
@@ -1,9 +1,18 @@
+import rospy
+
+
 from geometry_msgs.msg import (
     Point,
     Pose,
+    PoseStamped,
+    Quaternion,
 )
+from nav_msgs.srv import GetPlan
+from nav_msgs.msg import Path
 
 import numpy as np
+import math
+from scipy.spatial.transform import Rotation as R
 from itertools import permutations
 
 from typing import Union, List
@@ -27,3 +36,49 @@ def min_hamiltonian_path(start: Pose, poses: List[Pose]) -> Union[None, List[Pos
             best_order = list(perm)
 
     return best_order
+
+
+def get_pose_on_path(
+    p1: PoseStamped, p2: PoseStamped, dist_to_goal: float = 1.0, tolerance: float = 0.5
+) -> Union[None, PoseStamped]:
+    make_plan: rospy.ServiceProxy = rospy.ServiceProxy("/move_base/make_plan", GetPlan)
+
+    chosen_pose: Union[None, PoseStamped] = None
+
+    rospy.loginfo(f"Getting plan from {p1} to {p2}.")
+
+    if p1.header.frame_id != p2.header.frame_id != "map":
+        rospy.loginfo(
+            f"Frames of reference are not 'map' ({p1.header.frame_id} and {p2.header.frame_id})."
+        )
+        return chosen_pose
+
+    try:
+        make_plan.wait_for_service(timeout=rospy.Duration.from_sec(10.0))
+    except rospy.ROSException:
+        rospy.loginfo("Service /move_base/make_plan not available.")
+        return chosen_pose
+
+    try:
+        plan: Path = make_plan(p1, p2, tolerance).plan
+    except rospy.ServiceException as e:
+        rospy.loginfo(e)
+        return chosen_pose
+
+    rospy.loginfo(f"Got plan with {len(plan.poses)} poses.")
+
+    if len(plan.poses) > 0:
+        for pose in reversed(plan.poses):
+            if euclidian_distance(pose.pose.position, p2.pose.position) >= dist_to_goal:
+                chosen_pose = pose
+                break
+
+    return chosen_pose
+
+
+def compute_face_quat(p1: Pose, p2: Pose) -> Quaternion:
+    dx: float = p2.position.x - p1.position.x
+    dy: float = p2.position.y - p1.position.y
+    theta_deg = np.degrees(math.atan2(dy, dx))
+    x, y, z, w = R.from_euler("z", theta_deg, degrees=True).as_quat()
+    return Quaternion(x, y, z, w)
diff --git a/common/speech/lasr_speech_recognition_whisper/nodes/transcribe_microphone_server b/common/speech/lasr_speech_recognition_whisper/nodes/transcribe_microphone_server
@@ -87,6 +87,7 @@ class TranscribeSpeechAction(object):
         self._listening = False
 
         self._action_server.start()
+        rospy.loginfo(f"Speech Action server {self._action_name} started")
 
     def _configure_microphone(self) -> sr.Microphone:
         """Configures the microphone for listening to speech based on the
@@ -332,8 +333,8 @@ def configure_model_params(config: dict) -> speech_model_params:
         model_params.mic_device = config["mic_device"]
     if config["no_warmup"]:
         model_params.warmup = False
-    if config["energy_threshold"]:
-        model_params.energy_threshold = config["energy_threshold"]
+    # if config["energy_threshold"]:
+    #     model_params.energy_threshold = config["energy_threshold"]
     if config["pause_threshold"]:
         model_params.pause_threshold = config["pause_threshold"]
 

diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py b/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py
@@ -28,6 +28,7 @@ def main():
     args = parse_args()
 
     recognizer = sr.Recognizer()
+    recognizer.pause_threshold = 2
     microphone = sr.Microphone(device_index=args["device_index"], sample_rate=16000)
     threshold = 100
     recognizer.dynamic_energy_threshold = False
@@ -39,7 +40,9 @@ def main():
     while transcription_result != "":
         print(f"Listening...")
         with microphone as source:
-            wav_data = recognizer.listen(source).get_wav_data()
+            wav_data = recognizer.listen(
+                source, phrase_time_limit=10, timeout=5
+            ).get_wav_data()
         print(f"Processing...")
         # Magic number 32768.0 is the maximum value of a 16-bit signed integer
         float_data = (

diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/repeat_after_me.py b/common/speech/lasr_speech_recognition_whisper/scripts/repeat_after_me.py
@@ -18,6 +18,7 @@
 
 if USE_ACTIONLIB:
     client = actionlib.SimpleActionClient("transcribe_speech", TranscribeSpeechAction)
+    rospy.loginfo("Waiting for server...")
     client.wait_for_server()
     repeating = False
     rospy.loginfo("Done waiting")

diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py b/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py
@@ -34,9 +34,10 @@ def main(args: dict) -> None:
     output_dir = args["output_dir"]
 
     r = sr.Recognizer()
-    with sr.Microphone(device_index=13, sample_rate=16000) as source:
+    r.pause_threshold = 2
+    with sr.Microphone(device_index=9, sample_rate=16000) as source:
         print("Say something!")
-        audio = r.listen(source, timeout=5, phrase_time_limit=5)
+        audio = r.listen(source, timeout=5, phrase_time_limit=10)
         print("Finished listening")
 
     with open(os.path.join(output_dir, "microphone.raw"), "wb") as f:

diff --git a/...sion/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py b/...sion/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py
@@ -171,10 +171,10 @@ def _3d_bbox_crop(
         )
         for det in detections
     ]
+
     if crop_method == "closest":
         detections = [det for _, det in sorted(zip(distances, detections))]
         distances.sort()
-
     elif crop_method == "furthest":
         detections = [
             det for _, det in sorted(zip(distances, detections), reverse=True)

diff --git a/common/vision/lasr_vision_feature_extraction/nodes/service b/common/vision/lasr_vision_feature_extraction/nodes/service
@@ -4,8 +4,8 @@ from lasr_vision_msgs.srv import (
     TorchFaceFeatureDetectionDescriptionResponse,
 )
 from lasr_vision_feature_extraction.categories_and_attributes import (
-    CategoriesAndAttributes,
-    CelebAMaskHQCategoriesAndAttributes,
+    # CategoriesAndAttributes,
+    # CelebAMaskHQCategoriesAndAttributes,
     DeepFashion2GeneralizedCategoriesAndAttributes,
 )
 

diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/__init__.py
@@ -1,6 +1,5 @@
 import json
 from os import path
-
 import cv2
 import numpy as np
 import rospkg
@@ -11,12 +10,10 @@
 import torchvision.models as models
 from lasr_vision_feature_extraction.categories_and_attributes import (
     CategoriesAndAttributes,
-    CelebAMaskHQCategoriesAndAttributes,
     DeepFashion2GeneralizedCategoriesAndAttributes,
 )
 from lasr_vision_feature_extraction.image_with_masks_and_attributes import (
     ImageWithMasksAndAttributes,
-    ImageOfPerson,
     ImageOfCloth,
 )
 from lasr_vision_msgs.srv import Vqa, VqaRequest
@@ -55,129 +52,6 @@ def forward(self, x_copy, x):
         return x
 
 
-class UNetWithResnetEncoder(nn.Module):
-    def __init__(self, num_classes, in_channels=3, freeze_bn=False, sigmoid=True):
-        super(UNetWithResnetEncoder, self).__init__()
-        self.sigmoid = sigmoid
-        self.resnet = models.resnet34(
-            pretrained=False
-        )  # Initialize with a ResNet model
-        if in_channels != 3:
-            self.resnet.conv1 = nn.Conv2d(
-                in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
-            )
-
-        self.encoder1 = nn.Sequential(
-            self.resnet.conv1, self.resnet.bn1, self.resnet.relu
-        )
-        self.encoder2 = self.resnet.layer1
-        self.encoder3 = self.resnet.layer2
-        self.encoder4 = self.resnet.layer3
-        self.encoder5 = self.resnet.layer4
-
-        self.up1 = Decoder(512, 256, 256)
-        self.up2 = Decoder(256, 128, 128)
-        self.up3 = Decoder(128, 64, 64)
-        self.up4 = Decoder(64, 64, 64)
-
-        self.final_conv = nn.Conv2d(64, num_classes, kernel_size=1)
-        self._initialize_weights()
-
-        if freeze_bn:
-            self.freeze_bn()
-
-    def _initialize_weights(self):
-        for module in self.modules():
-            if isinstance(module, nn.Conv2d) or isinstance(module, nn.ConvTranspose2d):
-                nn.init.kaiming_normal_(module.weight)
-                if module.bias is not None:
-                    module.bias.data.zero_()
-                elif isinstance(module, nn.BatchNorm2d):
-                    module.weight.data.fill_(1)
-                    module.bias.data.zero_()
-
-    def forward(self, x):
-        x1 = self.encoder1(x)
-        x2 = self.encoder2(x1)
-        x3 = self.encoder3(x2)
-        x4 = self.encoder4(x3)
-        x5 = self.encoder5(x4)
-
-        x = self.up1(x4, x5)
-        x = self.up2(x3, x)
-        x = self.up3(x2, x)
-        x = self.up4(x1, x)
-        x = F.interpolate(
-            x, size=(x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
-        )
-
-        x = self.final_conv(x)
-
-        if self.sigmoid:
-            x = torch.sigmoid(x)
-        return x
-
-    def freeze_bn(self):
-        for module in self.modules():
-            if isinstance(module, nn.BatchNorm2d):
-                module.eval()
-
-    def unfreeze_bn(self):
-        for module in self.modules():
-            if isinstance(module, nn.BatchNorm2d):
-                module.train()
-
-
-class MultiLabelResNet(nn.Module):
-    def __init__(self, num_labels, input_channels=3, sigmoid=True):
-        super(MultiLabelResNet, self).__init__()
-        self.model = models.resnet34(pretrained=False)
-        self.sigmoid = sigmoid
-
-        if input_channels != 3:
-            self.model.conv1 = nn.Conv2d(
-                input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
-            )
-
-        num_ftrs = self.model.fc.in_features
-
-        self.model.fc = nn.Linear(num_ftrs, num_labels)
-
-    def forward(self, x):
-        x = self.model(x)
-        if self.sigmoid:
-            x = torch.sigmoid(x)
-        return x
-
-
-class CombinedModel(nn.Module):
-    def __init__(
-        self, segment_model: nn.Module, predict_model: nn.Module, cat_layers: int = None
-    ):
-        super(CombinedModel, self).__init__()
-        self.segment_model = segment_model
-        self.predict_model = predict_model
-        self.cat_layers = cat_layers
-        self.freeze_seg = False
-
-    def forward(self, x: torch.Tensor):
-        seg_masks = self.segment_model(x)
-        seg_masks_ = seg_masks.detach()
-        if self.cat_layers:
-            seg_masks_ = seg_masks_[:, 0 : self.cat_layers]
-            x = torch.cat((x, seg_masks_), dim=1)
-        else:
-            x = torch.cat((x, seg_masks_), dim=1)
-        logic_outputs = self.predict_model(x)
-        return seg_masks, logic_outputs
-
-    def freeze_segment_model(self):
-        self.segment_model.eval()
-
-    def unfreeze_segment_model(self):
-        self.segment_model.train()
-
-
 class SegmentPredictor(nn.Module):
     def __init__(self, num_masks, num_labels, in_channels=3, sigmoid=True):
         super(SegmentPredictor, self).__init__()
@@ -445,32 +319,6 @@ def predict(self, rgb_image: np.ndarray) -> ImageWithMasksAndAttributes:
         return image_obj
 
 
-def load_face_classifier_model():
-    cat_layers = CelebAMaskHQCategoriesAndAttributes.merged_categories.keys().__len__()
-    segment_model = UNetWithResnetEncoder(num_classes=cat_layers)
-    predictions = (
-        len(CelebAMaskHQCategoriesAndAttributes.attributes)
-        - len(CelebAMaskHQCategoriesAndAttributes.avoided_attributes)
-        + len(CelebAMaskHQCategoriesAndAttributes.mask_labels)
-    )
-    predict_model = MultiLabelResNet(
-        num_labels=predictions, input_channels=cat_layers + 3
-    )
-    model = CombinedModel(segment_model, predict_model, cat_layers=cat_layers)
-    model.eval()
-
-    r = rospkg.RosPack()
-    model, _, _, _ = load_torch_model(
-        model,
-        None,
-        path=path.join(
-            r.get_path("lasr_vision_feature_extraction"), "models", "face_model.pth"
-        ),
-        cpu_only=True,
-    )
-    return model
-
-
 def load_cloth_classifier_model():
     num_classes = len(DeepFashion2GeneralizedCategoriesAndAttributes.attributes)
     model = SegmentPredictorBbox(
@@ -597,7 +445,12 @@ def predict_frame(
         rst_person["hair_shape"] = "long hair"
 
     result = {
-        **rst_person,
+        "has_hair": 0.0,
+        "hair_colour": "Not used.",
+        "hair_shape": "Not used.",
+        "facial_hair": 0.0,
+        "glasses": 0.0,
+        "hat": 0.0,
         **rst_cloth,
     }