From 6282d2d7843b64edea13fcf755826af3eed2605d Mon Sep 17 00:00:00 2001
From: Benteng Ma <benteng.ma@kcl.ac.uk>
Date: Fri, 2 Feb 2024 15:24:54 +0000
Subject: [PATCH] xyz axis readable.

---
 .../helpers/cv2_img/src/cv2_img/__init__.py   | 39 ++++++++++++++
 .../src/lasr_vision_bodypix/bodypix.py        | 14 ++---
 skills/src/lasr_skills/describe_people.py     | 28 ++++++----
 skills/src/lasr_skills/vision/__init__.py     |  4 +-
 skills/src/lasr_skills/vision/get_image.py    | 53 ++++++++++++++++++-
 .../lasr_skills/vision/image_msg_to_cv2.py    | 22 +++++++-
 tasks/receptionist/src/receptionist/sm.py     |  1 -
 .../receptionist/states/speakdescriptions.py  | 16 +++---
 8 files changed, 146 insertions(+), 31 deletions(-)

diff --git a/common/helpers/cv2_img/src/cv2_img/__init__.py b/common/helpers/cv2_img/src/cv2_img/__init__.py
index 822b2839a..ad0c74b74 100644
--- a/common/helpers/cv2_img/src/cv2_img/__init__.py
+++ b/common/helpers/cv2_img/src/cv2_img/__init__.py
@@ -1,5 +1,6 @@
 import rospy
 import numpy as np
+import ros_numpy as rnp
 
 from PIL import Image
 from sensor_msgs.msg import Image as SensorImage
@@ -64,9 +65,47 @@ def msg_to_cv2_img(msg: SensorImage):
     img = np.array(img)
     img = img[:, :, ::-1].copy()
 
+    rospy.logwarn('shape of the image: ' + str(img.shape))
+
     return img
 
 
+# pcl_xyz = rnp.point_cloud2.pointcloud2_to_xyz_array(pcl_msg, remove_nans=False)
+def pcl_msg_to_xyz(pcl_msg):
+    pcl_xyz = rnp.point_cloud2.pointcloud2_to_xyz_array(pcl_msg, remove_nans=False)
+
+    rospy.logwarn('shape of the image: ' + str(pcl_xyz.shape))
+
+    return pcl_xyz
+
+
+def pcl_msg_to_cv2(pcl_msg):
+    """
+    Constructs a cv2 image from a PointCloud2 message.
+    
+    Parameters
+    ----------
+    pcl_msg : sensor_msgs/PointCloud2
+        Input pointcloud (organised)
+    
+    Returns
+    -------
+    np.array : cv2 image
+    """
+
+    # Extract rgb image from pointcloud
+    frame = np.fromstring(pcl_msg.data, dtype=np.uint8)
+    frame = frame.reshape(pcl_msg.height, pcl_msg.width, 32)
+    frame = frame[:,:,16:19]
+
+    # Ensure array is contiguous
+    frame = np.ascontiguousarray(frame, dtype=np.uint8)
+
+    rospy.logwarn('shape of the image: ' + str(frame.shape))
+
+    return frame
+
+
 def extract_mask_region(frame, mask, expand_x=0.5, expand_y=0.5):
     """
     Extracts the face region from the image and expands the region by the specified amount.
diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
index fd62c29b3..e4765814b 100644
--- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
+++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
@@ -95,16 +95,16 @@ def detect(request: BodyPixDetectionRequest, debug_publisher: rospy.Publisher |
             neck_y = right_shoulder.y
             neck_score = right_shoulder_keypoint.score
 
-        # Convert neck coordinates to relative positions (0-1)
-        rel_neck_x = neck_x / img_width
-        rel_neck_y = neck_y / img_height
+        # # Convert neck coordinates to relative positions (0-1)
+        # rel_neck_x = neck_x / img_width
+        # rel_neck_y = neck_y / img_height
 
-        pose = BodyPixPose()
-        pose.coord = np.array([rel_neck_x, rel_neck_y]).astype(np.float32)
-        neck_coordinates.append(pose)
+        # pose = BodyPixPose()
+        # pose.coord = np.array([rel_neck_x, rel_neck_y]).astype(np.float32)
+        # neck_coordinates.append(pose)
 
         pose = BodyPixPose()
-        pose.coord = np.array([neck_x, neck_y]).astype(np.float32)
+        pose.coord = np.array([neck_x, neck_y]).astype(np.int32)
         neck_coordinates.append(pose)
     
     # publish to debug topic
diff --git a/skills/src/lasr_skills/describe_people.py b/skills/src/lasr_skills/describe_people.py
index cfe8a52a2..eb38c9b2d 100644
--- a/skills/src/lasr_skills/describe_people.py
+++ b/skills/src/lasr_skills/describe_people.py
@@ -11,7 +11,7 @@
 from lasr_vision_msgs.srv import YoloDetection, BodyPixDetection, TorchFaceFeatureDetection
 from numpy2message import numpy2message
 
-from .vision import GetImage, ImageMsgToCv2
+from .vision import GetImage, ImageMsgToCv2, Get3DImage, PclMsgToCv2, Get2DAnd3DImages
 
 
 class DescribePeople(smach.StateMachine):
@@ -21,16 +21,21 @@ def __init__(self):
             self, outcomes=['succeeded', 'failed'], input_keys=[], output_keys=['people'])
 
         with self:
-            smach.StateMachine.add('GET_IMAGE', GetImage(), transitions={
+            # smach.StateMachine.add('GET_IMAGE', GetImage(), transitions={
+            #                        'succeeded': 'CONVERT_IMAGE'})
+            # smach.StateMachine.add('CONVERT_IMAGE', ImageMsgToCv2(), transitions={
+            #                        'succeeded': 'SEGMENT'})
+
+            smach.StateMachine.add('GET_IMAGE', Get2DAnd3DImages(), transitions={
                                    'succeeded': 'CONVERT_IMAGE'})
-            smach.StateMachine.add('CONVERT_IMAGE', ImageMsgToCv2(), transitions={
+            smach.StateMachine.add('CONVERT_IMAGE', PclMsgToCv2(), transitions={
                                    'succeeded': 'SEGMENT'})
 
             sm_con = smach.Concurrence(outcomes=['succeeded', 'failed'],
                                        default_outcome='failed',
                                        outcome_map={'succeeded': {
                                            'SEGMENT_YOLO': 'succeeded', 'SEGMENT_BODYPIX': 'succeeded'}},
-                                       input_keys=['img', 'img_msg'],
+                                       input_keys=['img', 'img_msg_2d', 'img_msg_3d', 'xyz'],
                                        output_keys=['people_detections', 'bodypix_masks'])
 
             with sm_con:
@@ -51,13 +56,13 @@ class SegmentYolo(smach.State):
 
         def __init__(self):
             smach.State.__init__(self, outcomes=['succeeded', 'failed'], input_keys=[
-                                 'img_msg'], output_keys=['people_detections'])
+                                 'img_msg_2d'], output_keys=['people_detections'])
             self.yolo = rospy.ServiceProxy('/yolov8/detect', YoloDetection)
 
         def execute(self, userdata):
             try:
                 result = self.yolo(
-                    userdata.img_msg, "yolov8n-seg.pt", 0.5, 0.3)
+                    userdata.img_msg_2d, "yolov8n-seg.pt", 0.5, 0.3)
                 userdata.people_detections = [
                     det for det in result.detected_objects if det.name == "person"]
                 return 'succeeded'
@@ -74,7 +79,7 @@ class SegmentBodypix(smach.State):
 
         def __init__(self):
             smach.State.__init__(self, outcomes=['succeeded', 'failed'], input_keys=[
-                                 'img_msg'], output_keys=['bodypix_masks'])
+                                 'img_msg_2d', 'xyz'], output_keys=['bodypix_masks'])
             self.bodypix = rospy.ServiceProxy(
                 '/bodypix/detect', BodyPixDetection)
 
@@ -88,11 +93,14 @@ def execute(self, userdata):
 
                 masks = [torso, head]
 
-                result = self.bodypix(userdata.img_msg, "resnet50", 0.7, masks)
+                result = self.bodypix(userdata.img_msg_2d, "resnet50", 0.7, masks)
                 userdata.bodypix_masks = result.masks
                 rospy.loginfo("Found:::%s" % str(len(result.poses)))
-                neck_coord = (result.poses[0].coord[0], result.poses[0].coord[1])
-                rospy.loginfo("COORD:::%s" % str(neck_coord))
+                neck_coord = (int(result.poses[0].coord[0]), int(result.poses[0].coord[1]))
+                rospy.loginfo("COORD_XY:::%s" % str(neck_coord))
+                xyz = userdata.xyz
+                xyz = np.nanmean(xyz, axis=2)
+                rospy.loginfo("COORD_Z:::%s" % str(xyz[neck_coord[0]][neck_coord[1]]))
                 return 'succeeded'
             except rospy.ServiceException as e:
                 rospy.logwarn(f"Unable to perform inference. ({str(e)})")
diff --git a/skills/src/lasr_skills/vision/__init__.py b/skills/src/lasr_skills/vision/__init__.py
index 53bf0eb7b..127fbc849 100644
--- a/skills/src/lasr_skills/vision/__init__.py
+++ b/skills/src/lasr_skills/vision/__init__.py
@@ -1,3 +1,3 @@
-from .get_image import GetImage
+from .get_image import GetImage, Get3DImage, Get2DAnd3DImages
 from .image_cv2_to_msg import ImageCv2ToMsg
-from .image_msg_to_cv2 import ImageMsgToCv2
+from .image_msg_to_cv2 import ImageMsgToCv2, PclMsgToCv2
diff --git a/skills/src/lasr_skills/vision/get_image.py b/skills/src/lasr_skills/vision/get_image.py
index c0543c6e1..edad9c078 100644
--- a/skills/src/lasr_skills/vision/get_image.py
+++ b/skills/src/lasr_skills/vision/get_image.py
@@ -1,7 +1,7 @@
 import os
 import smach
 import rospy
-from sensor_msgs.msg import Image
+from sensor_msgs.msg import Image, PointCloud2
 # PointCloud2
 # topic: /xtion/depth_registered/points
 
@@ -28,3 +28,54 @@ def execute(self, userdata):
             return 'failed'
 
         return 'succeeded'
+    
+
+class Get3DImage(smach.State):
+    """
+    State for reading an sensor_msgs Image message
+    """
+
+    def __init__(self, topic: str = None):
+        smach.State.__init__(
+            self, outcomes=['succeeded', 'failed'], output_keys=['img_msg'])
+
+        if topic is None:
+            self.topic = '/xtion/depth_registered/points'
+        else:
+            self.topic = topic
+
+    def execute(self, userdata):
+        try:
+            userdata.img_msg = rospy.wait_for_message(
+                self.topic, PointCloud2)
+        except Exception as e:
+            print(e)
+            return 'failed'
+
+        return 'succeeded'
+
+
+class Get2DAnd3DImages(smach.State):
+    """
+    State for reading an sensor_msgs Image message
+    """
+
+    def __init__(self, topic: str = None):
+        smach.State.__init__(
+            self, outcomes=['succeeded', 'failed'], output_keys=['img_msg_2d', 'img_msg_3d'])
+
+        self.topic1 = '/xtion/rgb/image_raw'
+        self.topic2 = '/xtion/depth_registered/points'
+
+    def execute(self, userdata):
+        try:
+            userdata.img_msg_2d = rospy.wait_for_message(
+                self.topic1, Image)
+            userdata.img_msg_3d = rospy.wait_for_message(
+                self.topic2, PointCloud2)
+        except Exception as e:
+            print(e)
+            return 'failed'
+
+        return 'succeeded'
+
diff --git a/skills/src/lasr_skills/vision/image_msg_to_cv2.py b/skills/src/lasr_skills/vision/image_msg_to_cv2.py
index e079b2e62..7ec81caa3 100644
--- a/skills/src/lasr_skills/vision/image_msg_to_cv2.py
+++ b/skills/src/lasr_skills/vision/image_msg_to_cv2.py
@@ -1,6 +1,7 @@
 import os
 import smach
 import cv2_img
+import rospy
 
 
 class ImageMsgToCv2(smach.State):
@@ -10,8 +11,25 @@ class ImageMsgToCv2(smach.State):
 
     def __init__(self):
         smach.State.__init__(
-            self, outcomes=['succeeded'], input_keys=['img_msg'], output_keys=['img'])
+            self, outcomes=['succeeded'], input_keys=['img_msg_2d'], output_keys=['img'])
 
     def execute(self, userdata):
-        userdata.img = cv2_img.msg_to_cv2_img(userdata.img_msg)
+        userdata.img = cv2_img.msg_to_cv2_img(userdata.img_msg_2d)
+        return 'succeeded'
+    
+
+class PclMsgToCv2(smach.State):
+    """
+    State for converting a sensor Image message to cv2 format
+    """
+
+    def __init__(self):
+        smach.State.__init__(
+            self, outcomes=['succeeded'], input_keys=['img_msg_3d'], output_keys=['img', 'xyz'])
+
+    def execute(self, userdata):
+        userdata.img = cv2_img.pcl_msg_to_cv2(userdata.img_msg_3d)
+        userdata.xyz = cv2_img.pcl_msg_to_xyz(userdata.img_msg_3d)
+        rospy.logwarn('succeeded on image converge.')
+        # print('succeeded on image converge.')
         return 'succeeded'
diff --git a/tasks/receptionist/src/receptionist/sm.py b/tasks/receptionist/src/receptionist/sm.py
index 3daddbfdf..591060211 100755
--- a/tasks/receptionist/src/receptionist/sm.py
+++ b/tasks/receptionist/src/receptionist/sm.py
@@ -32,6 +32,5 @@ def __init__(self):
             # smach.StateMachine.add('SPEAK_DESCRIPTIONS', SpeakDescriptions(self.default), transitions={'failed':'GO_TO_SEATING_AREA','succeeded':'GO_TO_SEATING_AREA'})
             # smach.StateMachine.add('GO_TO_SEATING_AREA', GoToSeatingArea(self.default), transitions={'succeeded' : 'LOOK_FOR_SEATS'})            
             # smach.StateMachine.add('LOOK_FOR_SEATS', LookForSeats(self.default), transitions={'succeeded' : 'GO_TO_WAIT_FOR_PERSON'})
-            # smach.StateMachine.add('END', End(self.default),transitions={'succeeded':'succeeded'})
             smach.StateMachine.add('DESCRIBE_PEOPLE', DescribePeople(),transitions={'succeeded':'SPEAK_DESCRIPTIONS','failed':'DESCRIBE_PEOPLE'})
             smach.StateMachine.add('SPEAK_DESCRIPTIONS', SpeakDescriptions(self.default), transitions={'failed':'DESCRIBE_PEOPLE','succeeded':'DESCRIBE_PEOPLE'})
diff --git a/tasks/receptionist/src/receptionist/states/speakdescriptions.py b/tasks/receptionist/src/receptionist/states/speakdescriptions.py
index bbf9d5d91..42e7546e8 100644
--- a/tasks/receptionist/src/receptionist/states/speakdescriptions.py
+++ b/tasks/receptionist/src/receptionist/states/speakdescriptions.py
@@ -8,15 +8,15 @@ def __init__(self, default):
         self.default = default
 
     def execute(self, userdata):
-        for person in userdata['people']:
-            self.default.voice.speak('I see a person')
+        # for person in userdata['people']:
+        #     self.default.voice.speak('I see a person')
             
-            for feature in person['features']:
-                if feature.label:
-                    if len(feature.colours) == 0:
-                        self.default.voice.speak(f'They have {feature.name}.')
-                        continue
+        #     for feature in person['features']:
+        #         if feature.label:
+        #             if len(feature.colours) == 0:
+        #                 self.default.voice.speak(f'They have {feature.name}.')
+        #                 continue
                     
-                    self.default.voice.speak(f'They have {feature.name} and it has the colour {feature.colours[0]}')
+        #             self.default.voice.speak(f'They have {feature.name} and it has the colour {feature.colours[0]}')
 
         return 'succeeded'