From 953d35c7851b6117aa5b77fa26fb8df9fc26e8c0 Mon Sep 17 00:00:00 2001
From: Matt Barker <105945282+m-barker@users.noreply.github.com>
Date: Tue, 14 May 2024 10:47:43 +0100
Subject: [PATCH 1/3] Gesture detection (#202)

* fix: incorrect remapping keys for handover

* fix: remove speech server in launch file

* feat: add find gesture person skill, with support for raisied arms

* feat: add pointing to gesture detection

* feat: add debug publisher to gesture detection

* feat: add mimicing of gesture detected by Tiago

* feat: update CML with new pointing API

* feat: update GPSR API with new gesture detection skill

* chore: remove old pointing skill & service

* feat: make debug always be published
---
 .../lasr_vision_pointing/CMakeLists.txt       | 213 -----------------
 .../lasr_vision_pointing/images/.gitkeep      |   0
 .../vision/lasr_vision_pointing/package.xml   |  77 ------
 .../lasr_vision_pointing/requirements.in      |   1 -
 .../lasr_vision_pointing/requirements.txt     |  24 --
 .../scripts/detect_pointing.py                | 175 --------------
 common/vision/lasr_vision_pointing/setup.py   |  10 -
 skills/config/motions.yaml                    |  25 ++
 skills/src/lasr_skills/__init__.py            |   1 +
 skills/src/lasr_skills/detect_gesture.py      |  97 ++++++--
 skills/src/lasr_skills/detect_pointing.py     |  26 ---
 skills/src/lasr_skills/find_gesture_person.py | 221 ++++++++++++++++++
 .../src/carry_my_luggage/state_machine.py     |  31 ++-
 tasks/gpsr/config/lab.yaml                    |   8 +-
 tasks/gpsr/src/gpsr/state_machine_factory.py  |  26 ++-
 15 files changed, 381 insertions(+), 554 deletions(-)
 delete mode 100644 common/vision/lasr_vision_pointing/CMakeLists.txt
 delete mode 100644 common/vision/lasr_vision_pointing/images/.gitkeep
 delete mode 100644 common/vision/lasr_vision_pointing/package.xml
 delete mode 100644 common/vision/lasr_vision_pointing/requirements.in
 delete mode 100644 common/vision/lasr_vision_pointing/requirements.txt
 delete mode 100644 common/vision/lasr_vision_pointing/scripts/detect_pointing.py
 delete mode 100644 common/vision/lasr_vision_pointing/setup.py
 delete mode 100644 skills/src/lasr_skills/detect_pointing.py
 create mode 100644 skills/src/lasr_skills/find_gesture_person.py

diff --git a/common/vision/lasr_vision_pointing/CMakeLists.txt b/common/vision/lasr_vision_pointing/CMakeLists.txt
deleted file mode 100644
index 66f1cf831..000000000
--- a/common/vision/lasr_vision_pointing/CMakeLists.txt
+++ /dev/null
@@ -1,213 +0,0 @@
-cmake_minimum_required(VERSION 3.0.2)
-project(lasr_vision_pointing)
-
-## Compile as C++11, supported in ROS Kinetic and newer
-# add_compile_options(-std=c++11)
-
-## Find catkin macros and libraries
-## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
-## is used, also find other catkin packages
-find_package(catkin REQUIRED COMPONENTS
-  geometry_msgs
-  sensor_msgs
-  roscpp
-  rospy
-  std_msgs
-  catkin_virtualenv
-)
-
-## System dependencies are found with CMake's conventions
-# find_package(Boost REQUIRED COMPONENTS system)
-
-
-## Uncomment this if the package has a setup.py. This macro ensures
-## modules and global scripts declared therein get installed
-## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
-catkin_python_setup()
-catkin_generate_virtualenv(
-  INPUT_REQUIREMENTS requirements.in
-  PYTHON_INTERPRETER python3.10
-)
-
-################################################
-## Declare ROS messages, services and actions ##
-################################################
-
-## To declare and build messages, services or actions from within this
-## package, follow these steps:
-## * Let MSG_DEP_SET be the set of packages whose message types you use in
-##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
-## * In the file package.xml:
-##   * add a build_depend tag for "message_generation"
-##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
-##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
-##     but can be declared for certainty nonetheless:
-##     * add a exec_depend tag for "message_runtime"
-## * In this file (CMakeLists.txt):
-##   * add "message_generation" and every package in MSG_DEP_SET to
-##     find_package(catkin REQUIRED COMPONENTS ...)
-##   * add "message_runtime" and every package in MSG_DEP_SET to
-##     catkin_package(CATKIN_DEPENDS ...)
-##   * uncomment the add_*_files sections below as needed
-##     and list every .msg/.srv/.action file to be processed
-##   * uncomment the generate_messages entry below
-##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
-
-## Generate messages in the 'msg' folder
-# add_message_files(
-#   FILES
-#   Message1.msg
-#   Message2.msg
-# )
-
-## Generate services in the 'srv' folder
-# add_service_files(
-#   FILES
-#   PointingDirection.srv
-# )
-
-## Generate actions in the 'action' folder
-# add_action_files(
-#   FILES
-#   Action1.action
-#   Action2.action
-# )
-
-## Generate added messages and services with any dependencies listed here
-# generate_messages(
-#   DEPENDENCIES
-#   geometry_msgs 
-#   std_msgs
-#   sensor_msgs
-# )
-
-################################################
-## Declare ROS dynamic reconfigure parameters ##
-################################################
-
-## To declare and build dynamic reconfigure parameters within this
-## package, follow these steps:
-## * In the file package.xml:
-##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
-## * In this file (CMakeLists.txt):
-##   * add "dynamic_reconfigure" to
-##     find_package(catkin REQUIRED COMPONENTS ...)
-##   * uncomment the "generate_dynamic_reconfigure_options" section below
-##     and list every .cfg file to be processed
-
-## Generate dynamic reconfigure parameters in the 'cfg' folder
-# generate_dynamic_reconfigure_options(
-#   cfg/DynReconf1.cfg
-#   cfg/DynReconf2.cfg
-# )
-
-###################################
-## catkin specific configuration ##
-###################################
-## The catkin_package macro generates cmake config files for your package
-## Declare things to be passed to dependent projects
-## INCLUDE_DIRS: uncomment this if your package contains header files
-## LIBRARIES: libraries you create in this project that dependent projects also need
-## CATKIN_DEPENDS: catkin_packages dependent projects also need
-## DEPENDS: system dependencies of this project that dependent projects also need
-catkin_package(
-#  INCLUDE_DIRS include
-#  LIBRARIES lasr_vision_pointing
- CATKIN_DEPENDS geometry_msgs roscpp rospy std_msgs sensor_msgs
-#  DEPENDS system_lib
-)
-
-###########
-## Build ##
-###########
-
-## Specify additional locations of header files
-## Your package locations should be listed before other locations
-include_directories(
-# include
-  ${catkin_INCLUDE_DIRS}
-)
-
-## Declare a C++ library
-# add_library(${PROJECT_NAME}
-#   src/${PROJECT_NAME}/lasr_vision_pointing.cpp
-# )
-
-## Add cmake target dependencies of the library
-## as an example, code may need to be generated before libraries
-## either from message generation or dynamic reconfigure
-# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
-
-## Declare a C++ executable
-## With catkin_make all packages are built within a single CMake context
-## The recommended prefix ensures that target names across packages don't collide
-# add_executable(${PROJECT_NAME}_node src/lasr_vision_pointing_node.cpp)
-
-## Rename C++ executable without prefix
-## The above recommended prefix causes long target names, the following renames the
-## target back to the shorter version for ease of user use
-## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
-# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
-
-## Add cmake target dependencies of the executable
-## same as for the library above
-# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
-
-## Specify libraries to link a library or executable target against
-# target_link_libraries(${PROJECT_NAME}_node
-#   ${catkin_LIBRARIES}
-# )
-
-#############
-## Install ##
-#############
-
-# all install targets should use catkin DESTINATION variables
-# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
-
-## Mark executable scripts (Python etc.) for installation
-## in contrast to setup.py, you can choose the destination
-catkin_install_python(PROGRAMS
-  scripts/detect_pointing.py
-  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-)
-
-## Mark executables for installation
-## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
-# install(TARGETS ${PROJECT_NAME}_node
-#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-# )
-
-## Mark libraries for installation
-## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
-# install(TARGETS ${PROJECT_NAME}
-#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-#   RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
-# )
-
-## Mark cpp header files for installation
-# install(DIRECTORY include/${PROJECT_NAME}/
-#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
-#   FILES_MATCHING PATTERN "*.h"
-#   PATTERN ".svn" EXCLUDE
-# )
-
-## Mark other files for installation (e.g. launch and bag files, etc.)
-install(FILES
-  requirements.txt  
-  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
-)
-
-#############
-## Testing ##
-#############
-
-## Add gtest based cpp test target and link libraries
-# catkin_add_gtest(${PROJECT_NAME}-test test/test_lasr_vision_pointing.cpp)
-# if(TARGET ${PROJECT_NAME}-test)
-#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
-# endif()
-
-## Add folders to be run by python nosetests
-# catkin_add_nosetests(test)
diff --git a/common/vision/lasr_vision_pointing/images/.gitkeep b/common/vision/lasr_vision_pointing/images/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/common/vision/lasr_vision_pointing/package.xml b/common/vision/lasr_vision_pointing/package.xml
deleted file mode 100644
index d99ef49f8..000000000
--- a/common/vision/lasr_vision_pointing/package.xml
+++ /dev/null
@@ -1,77 +0,0 @@
-<?xml version="1.0"?>
-<package format="2">
-  <name>lasr_vision_pointing</name>
-  <version>0.0.0</version>
-  <description>The lasr_vision_pointing package</description>
-
-  <!-- One maintainer tag required, multiple allowed, one person per tag -->
-  <!-- Example:  -->
-  <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
-  <maintainer email="syraj.alkhalil7@gmail.com"> Serge Alkhalil</maintainer>
-
-
-  <!-- One license tag required, multiple allowed, one license per tag -->
-  <!-- Commonly used license strings: -->
-  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
-  <license>TODO</license>
-
-
-  <!-- Url tags are optional, but multiple are allowed, one per tag -->
-  <!-- Optional attribute type can be: website, bugtracker, or repository -->
-  <!-- Example: -->
-  <!-- <url type="website">http://wiki.ros.org/lasr_vision_pointing</url> -->
-
-
-  <!-- Author tags are optional, multiple are allowed, one per tag -->
-  <!-- Authors do not have to be maintainers, but could be -->
-  <!-- Example: -->
-  <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
-
-
-  <!-- The *depend tags are used to specify dependencies -->
-  <!-- Dependencies can be catkin packages or system dependencies -->
-  <!-- Examples: -->
-  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
-  <!--   <depend>roscpp</depend> -->
-  <!--   Note that this is equivalent to the following: -->
-  <!--   <build_depend>roscpp</build_depend> -->
-  <!--   <exec_depend>roscpp</exec_depend> -->
-  <!-- Use build_depend for packages you need at compile time: -->
-  <!--   <build_depend>message_generation</build_depend> -->
-  <!-- Use build_export_depend for packages you need in order to build against this package: -->
-  <!--   <build_export_depend>message_generation</build_export_depend> -->
-  <!-- Use buildtool_depend for build tool packages: -->
-  <!--   <buildtool_depend>catkin</buildtool_depend> -->
-  <!-- Use exec_depend for packages you need at runtime: -->
-  <!--   <exec_depend>message_runtime</exec_depend> -->
-  <!-- Use test_depend for packages you need only for testing: -->
-  <!--   <test_depend>gtest</test_depend> -->
-  <!-- Use doc_depend for packages you need only for building documentation: -->
-  <!--   <doc_depend>doxygen</doc_depend> -->
-  <buildtool_depend>catkin</buildtool_depend>
-  <build_depend>geometry_msgs</build_depend>
-  <build_depend>roscpp</build_depend>
-  <build_depend>rospy</build_depend>
-  <build_depend>std_msgs</build_depend>
-  <build_depend>sensor_msgs</build_depend>
-  <build_export_depend>geometry_msgs</build_export_depend>
-  <build_export_depend>roscpp</build_export_depend>
-  <build_export_depend>rospy</build_export_depend>
-  <build_export_depend>std_msgs</build_export_depend>
-  <exec_depend>geometry_msgs</exec_depend>
-  <exec_depend>roscpp</exec_depend>
-  <exec_depend>rospy</exec_depend>
-  <exec_depend>std_msgs</exec_depend>
-  <exec_depend>sensor_msgs</exec_depend>
-  <depend>lasr_vision_msgs</depend>
-
-  <!-- virtual env tag -->
-  <build_depend>catkin_virtualenv</build_depend>
-
-  <!-- The export tag contains other, unspecified, tags -->
-  <export>
-    <!-- Other tools can request additional information be placed here -->
-    <pip_requirements>requirements.txt</pip_requirements>
-
-  </export>
-</package>
diff --git a/common/vision/lasr_vision_pointing/requirements.in b/common/vision/lasr_vision_pointing/requirements.in
deleted file mode 100644
index c1b708709..000000000
--- a/common/vision/lasr_vision_pointing/requirements.in
+++ /dev/null
@@ -1 +0,0 @@
-mediapipe==0.10.10
\ No newline at end of file
diff --git a/common/vision/lasr_vision_pointing/requirements.txt b/common/vision/lasr_vision_pointing/requirements.txt
deleted file mode 100644
index e6ef834ab..000000000
--- a/common/vision/lasr_vision_pointing/requirements.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-absl-py==2.1.0            # via mediapipe
-attrs==23.2.0             # via mediapipe
-cffi==1.16.0              # via sounddevice
-contourpy==1.2.1          # via matplotlib
-cycler==0.12.1            # via matplotlib
-flatbuffers==24.3.25      # via mediapipe
-fonttools==4.51.0         # via matplotlib
-jax==0.4.26               # via mediapipe
-kiwisolver==1.4.5         # via matplotlib
-matplotlib==3.8.4         # via mediapipe
-mediapipe==0.10.10        # via -r requirements.in
-ml-dtypes==0.4.0          # via jax
-numpy==1.26.4             # via contourpy, jax, matplotlib, mediapipe, ml-dtypes, opencv-contrib-python, opt-einsum, scipy
-opencv-contrib-python==4.9.0.80  # via mediapipe
-opt-einsum==3.3.0         # via jax
-packaging==24.0           # via matplotlib
-pillow==10.3.0            # via matplotlib
-protobuf==3.20.3          # via mediapipe
-pycparser==2.22           # via cffi
-pyparsing==3.1.2          # via matplotlib
-python-dateutil==2.9.0.post0  # via matplotlib
-scipy==1.13.0             # via jax
-six==1.16.0               # via python-dateutil
-sounddevice==0.4.6        # via mediapipe
diff --git a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py b/common/vision/lasr_vision_pointing/scripts/detect_pointing.py
deleted file mode 100644
index b56e0d14e..000000000
--- a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import cv2
-import rospkg
-import rospy
-import mediapipe as mp
-from cv_bridge import CvBridge
-from lasr_vision_msgs.srv import (
-    YoloDetectionRequest,
-    YoloDetection,
-    PointingDirection,
-    PointingDirectionResponse,
-    PointingDirectionRequest,
-)
-
-
-class PointingDetector:
-    def __init__(self):
-        self.detect_service = rospy.ServiceProxy("/yolov8/detect", YoloDetection)
-        self.service = rospy.Service(
-            "/pointing_detection_service", PointingDirection, self.excute
-        )
-        self.counter = 0
-
-        # Load MediaPipe Pose model
-        self.mp_pose = mp.solutions.pose
-        self.pose = self.mp_pose.Pose()
-
-    def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse:
-        img = req.image_raw
-        resp = PointingDirectionResponse()
-        people_bounding_box = self.detection(img)
-
-        if people_bounding_box:
-            for person in people_bounding_box:
-                keypoints = self.detect_keypoints(
-                    img
-                )  # Detect keypoints using MediaPipe
-                direction = self.determine_pointing_direction(
-                    keypoints, buffer_width=25
-                )
-                rospy.loginfo(f"Person detected pointing: {direction}")
-
-                # Visualize pointing direction with landmarks
-                image_path = os.path.join(
-                    rospkg.RosPack().get_path("lasr_vision_pointing"),
-                    "images",
-                    f"image{self.counter}.jpg",
-                )
-                self.visualize_pointing_direction_with_landmarks(
-                    image_path, person, direction, keypoints
-                )
-
-                resp.direction = direction
-        else:
-            resp.direction = "NONE"
-
-        self.counter += 1
-        return resp
-
-    def store_image(self, img):
-        package_path = rospkg.RosPack().get_path("lasr_vision_pointing")
-        os.chdir(os.path.abspath(os.path.join(package_path, "images")))
-        cv2.imwrite(f"image{self.counter}.jpg", img)
-
-    def detect_keypoints(self, img):
-        img_bridge = CvBridge().imgmsg_to_cv2(img, desired_encoding="passthrough")
-        img_rgb = cv2.cvtColor(img_bridge, cv2.COLOR_BGR2RGB)
-
-        # store the image
-        self.store_image(img_rgb)
-
-        results = self.pose.process(img_rgb)
-
-        keypoints = []
-        if results.pose_landmarks:
-            for landmark in results.pose_landmarks.landmark:
-                x = int(landmark.x * img.width)
-                y = int(landmark.y * img.height)
-                keypoints.append((x, y))
-
-        return keypoints
-
-    def detection(self, img):
-        request = YoloDetectionRequest()
-        request.image_raw = img  # sensor_msgs/Image
-        request.dataset = "yolov8n-seg.pt"  # YOLOv8 model, auto-downloads
-        request.confidence = 0.7  # minimum confidence to include in results
-        request.nms = 0.4  # non maximal supression
-
-        # send request
-        response = self.detect_service(request)
-
-        result = []
-        for detection in response.detected_objects:
-            if detection.name == "person":
-                # cords of person in image
-                result.append(detection.xywh)
-
-        return result
-
-    def determine_pointing_direction(self, keypoints, buffer_width=50):
-        # Ensure keypoints are available
-        if (
-            len(keypoints) >= 7
-        ):  # Ensure we have at least 7 keypoints for the upper body
-            # Extract relevant keypoints for shoulders and wrists
-            left_shoulder = keypoints[self.mp_pose.PoseLandmark.LEFT_SHOULDER.value]
-            right_shoulder = keypoints[self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
-            left_wrist = keypoints[self.mp_pose.PoseLandmark.LEFT_WRIST.value]
-            right_wrist = keypoints[self.mp_pose.PoseLandmark.RIGHT_WRIST.value]
-
-            # Ensure all keypoints are detected
-            if left_shoulder and right_shoulder and left_wrist and right_wrist:
-                # Calculate the x-coordinate difference between shoulders and wrists
-                left_diff = left_wrist[0] - left_shoulder[0]
-                right_diff = right_shoulder[0] - right_wrist[0]
-
-                # Determine pointing direction based on the difference in x-coordinates
-                if abs(left_diff - right_diff) < buffer_width:
-                    return "FORWARDS"
-                elif abs(left_diff) > buffer_width and abs(left_diff) > abs(right_diff):
-                    return "LEFT" if left_diff > 0 else "RIGHT"
-                elif abs(right_diff) > buffer_width and abs(right_diff) > abs(
-                    left_diff
-                ):
-                    return "RIGHT" if right_diff > 0 else "LEFT"
-
-        # Default: Determine direction based on the relative position to the center of the image
-        return "NONE"
-
-    def visualize_pointing_direction_with_landmarks(
-        self, image_path, person_bbox, pointing_direction, keypoints
-    ):
-        # Load image
-        img = cv2.imread(image_path)
-
-        # Extract person bbox coordinates
-        x, y, w, h = person_bbox
-        # Calculate center of bbox
-        center_x = x + w // 2
-        center_y = y + h // 2
-
-        # Calculate endpoint of arrow based on pointing direction
-        arrow_length = min(w, h) // 2
-        if pointing_direction == "LEFT":
-            endpoint = (center_x - arrow_length, center_y)
-        elif pointing_direction == "RIGHT":
-            endpoint = (center_x + arrow_length, center_y)
-        elif pointing_direction == "FORWARDS":
-            endpoint = (center_x, center_y)
-        else:
-            return  # No pointing direction detected
-
-        # Draw arrow on image
-        color = (0, 255, 0)  # Green color
-        thickness = 2
-        cv2.arrowedLine(img, (center_x, center_y), endpoint, color, thickness)
-
-        # Draw landmarks (keypoints) on the image
-        for keypoint in keypoints:
-            if keypoint:
-                cv2.circle(img, keypoint, 3, (0, 0, 255), -1)  # Red color for landmarks
-
-        # Store image with pointing direction visualization and landmarks
-        output_image_path = f"landmark_image{self.counter}.jpg"  # Change the output image path as needed
-        cv2.imwrite(output_image_path, img)
-
-
-if __name__ == "__main__":
-    rospy.init_node("pointing_detector")
-    pointer = PointingDetector()
-    rospy.loginfo("Pointing Detector is running")
-    rospy.spin()
diff --git a/common/vision/lasr_vision_pointing/setup.py b/common/vision/lasr_vision_pointing/setup.py
deleted file mode 100644
index e094797e8..000000000
--- a/common/vision/lasr_vision_pointing/setup.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-
-from distutils.core import setup
-from catkin_pkg.python_setup import generate_distutils_setup
-
-setup_args = generate_distutils_setup(
-    packages=["lasr_vision_pointing"], package_dir={"": "src"}
-)
-
-setup(**setup_args)
diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml
index 302eb6083..2b98cc74e 100644
--- a/skills/config/motions.yaml
+++ b/skills/config/motions.yaml
@@ -39,4 +39,29 @@ play_motion:
       joints: [head_1_joint, head_2_joint]
       points:
       - positions: [0.0, 0.0]
+        time_from_start: 0.0
+    raise_torso:
+      joints: [torso_lift_joint]
+      points:
+      - positions: [0.5]
+        time_from_start: 0.0
+    pointing_to_the_right:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [2.70, -1.47, 0.38, 1.92, 1.66, 0.42, 0.08]
+        time_from_start: 0.0
+    pointing_to_the_left:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [0.32, -1.42, -0.39, 2.05, 1.62, 0.56, 0.09]
+        time_from_start: 0.0
+    raising_right_arm:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [0.12, 0.09, -3.16, 1.65, 1.53, 0.00, 0.13]
+        time_from_start: 0.0
+    raising_left_arm:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13]
         time_from_start: 0.0
\ No newline at end of file
diff --git a/skills/src/lasr_skills/__init__.py b/skills/src/lasr_skills/__init__.py
index 736ee4823..4bceb19ca 100755
--- a/skills/src/lasr_skills/__init__.py
+++ b/skills/src/lasr_skills/__init__.py
@@ -21,3 +21,4 @@
 from .recognise import Recognise
 from .detect_pointing import DetectPointingDirection
 from .detect_gesture import DetectGesture
+from .find_gesture_person import FindGesturePerson
diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py
index 5ac565366..28d4f84cb 100755
--- a/skills/src/lasr_skills/detect_gesture.py
+++ b/skills/src/lasr_skills/detect_gesture.py
@@ -1,9 +1,14 @@
 #!/usr/bin/env python3
+from typing import Optional
 import smach
 import rospy
+import cv2
+import cv2_img
 from lasr_skills.vision import GetImage
+from lasr_skills import PlayMotion
 from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest
 from lasr_vision_msgs.msg import BodyPixMaskRequest
+from sensor_msgs.msg import Image
 
 
 class DetectGesture(smach.State):
@@ -11,16 +16,25 @@ class DetectGesture(smach.State):
     State for detecting gestures.
     """
 
-    def __init__(self, gesture_to_detect: str = "raising_left_hand"):
-        self.gesture_to_detect = gesture_to_detect
+    def __init__(
+        self,
+        gesture_to_detect: Optional[str] = None,
+        buffer_width: int = 50,
+    ):
+        """Optionally stores the gesture to detect. If None, it will infer the gesture from the keypoints."""
         smach.State.__init__(
             self,
             outcomes=["succeeded", "failed"],
             input_keys=["img_msg"],
             output_keys=["gesture_detected"],
         )
-
+        self.debug = debug
+        self.gesture_to_detect = gesture_to_detect
         self.body_pix_client = rospy.ServiceProxy("/bodypix/detect", BodyPixDetection)
+        self.debug_publisher = rospy.Publisher(
+            "/gesture_detection/debug", Image, queue_size=1
+        )
+        self.buffer_width = buffer_width
 
     def execute(self, userdata):
 
@@ -54,29 +68,67 @@ def execute(self, userdata):
                     "y": keypoint.xy[1],
                     "score": keypoint.score,
                 }
-
-        if self.gesture_to_detect == "raising_left_hand":
+        if (
+            self.gesture_to_detect == "raising_left_arm"
+            or self.gesture_to_detect is None
+        ):
             if part_info["leftWrist"]["y"] < part_info["leftShoulder"]["y"]:
-                userdata.gesture_detected = True
-            else:
-                userdata.gesture_detected = False
-        elif self.gesture_to_detect == "raising_right_hand":
+                self.gesture_to_detect = "raising_left_arm"
+        if (
+            self.gesture_to_detect == "raising_right_arm"
+            or self.gesture_to_detect is None
+        ):
             if part_info["rightWrist"]["y"] < part_info["rightShoulder"]["y"]:
-                userdata.gesture_detected = True
-            else:
-                userdata.gesture_detected = False
-        else:
-            raise ValueError("Invalid gesture to detect")
+                self.gesture_to_detect = "raising_right_arm"
+        if (
+            self.gesture_to_detect == "pointing_to_the_left"
+            or self.gesture_to_detect is None
+        ):
+            if (
+                part_info["leftWrist"]["x"] - self.buffer_width
+                > part_info["leftShoulder"]["x"]
+            ):
+                self.gesture_to_detect = "pointing_to_the_left"
+        if (
+            self.gesture_to_detect == "pointing_to_the_right"
+            or self.gesture_to_detect is None
+        ):
+            if (
+                part_info["rightShoulder"]["x"] - self.buffer_width
+                > part_info["rightWrist"]["x"]
+            ):
+                self.gesture_to_detect = "pointing_to_the_right"
+
+        if self.gesture_to_detect is None:
+            self.gesture_to_detect = "none"
+
+        userdata.gesture_detected = self.gesture_to_detect
+
+        cv2_gesture_img = cv2_img.msg_to_cv2_img(userdata.img_msg)
+        # Add text to the image
+        cv2.putText(
+            cv2_gesture_img,
+            self.gesture_to_detect,
+            (10, 30),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 255, 0),
+            2,
+            cv2.LINE_AA,
+        )
+        # Publish the image
+        self.debug_publisher.publish(cv2_img.cv2_img_to_msg(cv2_gesture_img))
 
         return "succeeded"
 
 
+### For example usage:
 class GestureDetectionSM(smach.StateMachine):
     """
     State machine for detecting gestures.
     """
 
-    def __init__(self, gesture_to_detect: str = "raising_left_hand"):
+    def __init__(self, gesture_to_detect: Optional[str] = None):
         smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
         self.gesture_to_detect = gesture_to_detect
         with self:
@@ -95,9 +147,22 @@ def __init__(self, gesture_to_detect: str = "raising_left_hand"):
 
 if __name__ == "__main__":
     rospy.init_node("gesture_detection_sm")
+    ### Example usage:
     while not rospy.is_shutdown():
         sm = GestureDetectionSM()
         sm.execute()
-        print("Raising Left Hand Detected:", sm.userdata.gesture_detected)
+        gesture_state = PlayMotion(motion_name=sm.userdata.gesture_detected)
+        gesture_sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+        with gesture_sm:
+            smach.StateMachine.add(
+                "GESTURE_STATE",
+                gesture_state,
+                transitions={
+                    "succeeded": "succeeded",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+            )
+        gesture_sm.execute()
 
     rospy.spin()
diff --git a/skills/src/lasr_skills/detect_pointing.py b/skills/src/lasr_skills/detect_pointing.py
deleted file mode 100644
index 21895dfc1..000000000
--- a/skills/src/lasr_skills/detect_pointing.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import rospy
-import smach
-from sensor_msgs.msg import Image
-
-from lasr_vision_msgs.srv import PointingDirection
-
-
-class DetectPointingDirection(smach.State):
-    def __init__(self, image_topic: str = "/xtion/rgb/image_raw"):
-        smach.State.__init__(
-            self, outcomes=["succeeded", "failed"], output_keys=["pointing_direction"]
-        )
-        self._image_topic = image_topic
-        self._pointing_service = rospy.ServiceProxy(
-            "/pointing_detection_service", PointingDirection
-        )
-        self._pointing_service.wait_for_service()
-
-    def execute(self, userdata):
-
-        img_msg = rospy.wait_for_message(self._image_topic, Image)
-        resp = self._pointing_service(img_msg)
-        if resp.direction == "NONE" or resp.direction == "FORWARDS":
-            return "failed"
-        userdata.pointing_direction = resp.direction
-        return "succeeded"
diff --git a/skills/src/lasr_skills/find_gesture_person.py b/skills/src/lasr_skills/find_gesture_person.py
new file mode 100644
index 000000000..ae195494a
--- /dev/null
+++ b/skills/src/lasr_skills/find_gesture_person.py
@@ -0,0 +1,221 @@
+import smach
+import rospy
+
+from lasr_skills import Detect3D, GoToLocation, DetectGesture, PlayMotion
+from lasr_skills.vision import GetImage
+import navigation_helpers
+
+from geometry_msgs.msg import Pose, PoseWithCovarianceStamped, Point, Quaternion
+
+
+from typing import List, Union
+
+
+class FindGesturePerson(smach.StateMachine):
+    class GetLocation(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["location_index", "waypoints"],
+                output_keys=["location"],
+            )
+
+        def execute(self, userdata):
+            userdata.location = userdata.waypoints[userdata.location_index]
+            return "succeeded"
+
+    class GetPose(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                output_keys=["current_pose"],
+            )
+
+        def execute(self, userdata):
+            userdata.current_pose = rospy.wait_for_message(
+                "/amcl_pose", PoseWithCovarianceStamped
+            ).pose.pose
+            return "succeeded"
+
+    class ComputePath(smach.State):
+        def __init__(self, waypoints: List[Pose]):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["current_pose"],
+                output_keys=["waypoints"],
+            )
+            self._waypoints = waypoints
+
+        def execute(self, userdata):
+            userdata.waypoints = navigation_helpers.min_hamiltonian_path(
+                userdata.current_pose, self._waypoints
+            )
+            return "succeeded"
+
+    class HandleDetections(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["detections_3d"],
+                output_keys=["person_point"],
+            )
+
+        def execute(self, userdata):
+            if len(userdata.detections_3d.detected_objects) == 0:
+                rospy.logwarn("No person detected, returning failed.")
+                return "failed"
+            userdata.person_point = userdata.detections_3d.detected_objects[0].point
+            return "succeeded"
+
+    class HandleResponse(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["gesture_detected"],
+            )
+
+        def execute(self, userdata):
+            if userdata.gesture_detected:
+                return "succeeded"
+            return "failed"
+
+    def __init__(
+        self,
+        gesture: str,
+        waypoints: Union[List[Pose], None] = None,
+        location_param: Union[str, None] = None,
+    ):
+        smach.StateMachine.__init__(
+            self, outcomes=["succeeded", "failed"], output_keys=["person_point"]
+        )
+        if waypoints is None and location_param is None:
+            raise ValueError("Either waypoints or location_param must be provided")
+
+        if gesture == "person raising their left arm":
+            self.gesture = "raising_left_arm"
+        elif gesture == "person raising their right arm":
+            self.gesture = "raising_right_arm"
+        elif gesture == "person pointing to the right":
+            self.gesture == "pointing_to_the_right"
+        elif gesture == "person pointing to the left":
+            self.gesture == "pointing_to_the_left"
+        else:
+            raise ValueError(f"Gesture {gesture} not recognized")
+
+        if waypoints is None:
+            waypoints_to_iterate: List[Pose] = []
+
+            room = rospy.get_param(location_param)
+            beacons = room["beacons"]
+            for beacon in beacons:
+                waypoint: Pose = Pose(
+                    position=Point(**beacons[beacon]["near_pose"]["position"]),
+                    orientation=Quaternion(
+                        **beacons[beacon]["near_pose"]["orientation"]
+                    ),
+                )
+                waypoints_to_iterate.append(waypoint)
+        else:
+            waypoints_to_iterate: List[Pose] = waypoints
+
+        with self:
+            smach.StateMachine.add(
+                "GET_POSE",
+                self.GetPose(),
+                transitions={"succeeded": "COMPUTE_PATH", "failed": "failed"},
+            )
+
+            smach.StateMachine.add(
+                "COMPUTE_PATH",
+                self.ComputePath(waypoints_to_iterate),
+                transitions={"succeeded": "WAYPOINT_ITERATOR", "failed": "failed"},
+            )
+
+            waypoint_iterator = smach.Iterator(
+                outcomes=["succeeded", "failed"],
+                it=lambda: range(len(waypoints_to_iterate)),
+                it_label="location_index",
+                input_keys=["waypoints"],
+                output_keys=["person_point"],
+                exhausted_outcome="failed",
+            )
+
+            with waypoint_iterator:
+                container_sm = smach.StateMachine(
+                    outcomes=["succeeded", "failed", "continue"],
+                    input_keys=["location_index", "waypoints"],
+                    output_keys=["person_point"],
+                )
+
+                with container_sm:
+                    smach.StateMachine.add(
+                        "GET_LOCATION",
+                        self.GetLocation(),
+                        transitions={"succeeded": "GO_TO_LOCATION", "failed": "failed"},
+                    )
+
+                    smach.StateMachine.add(
+                        "GO_TO_LOCATION",
+                        GoToLocation(),
+                        transitions={
+                            "succeeded": "DETECT3D",
+                            "failed": "failed",
+                        },
+                    )
+                    smach.StateMachine.add(
+                        "DETECT3D",
+                        Detect3D(filter=["person"]),
+                        transitions={
+                            "succeeded": "HANDLE_DETECTIONS",
+                            "failed": "failed",
+                        },
+                    )
+                    smach.StateMachine.add(
+                        "HANDLE_DETECTIONS",
+                        self.HandleDetections(),
+                        transitions={
+                            "succeeded": "PLAY_MOTION",
+                            "failed": "continue",
+                        },
+                    ),
+                    smach.StateMachine.add(
+                        "PLAY_MOTION",
+                        PlayMotion(motion_name="raise_torso"),
+                        transitions={
+                            "succeeded": "GET_IMAGE",
+                            "aborted": "failed",
+                            "preempted": "failed",
+                        },
+                    ),
+                    smach.StateMachine.add(
+                        "GET_IMAGE",
+                        GetImage(),
+                        transitions={"succeeded": "CHECK_GESTURE", "failed": "failed"},
+                    )
+                    smach.StateMachine.add(
+                        "CHECK_GESTURE",
+                        DetectGesture(self.gesture),
+                        transitions={
+                            "succeeded": "HANDLE_RESPONSE",
+                            "failed": "failed",
+                        },
+                    )
+                    smach.StateMachine.add(
+                        "HANDLE_RESPONSE",
+                        self.HandleResponse(),
+                        transitions={"succeeded": "succeeded", "failed": "continue"},
+                    )
+
+                waypoint_iterator.set_contained_state(
+                    "CONTAINER_STATE", container_sm, loop_outcomes=["continue"]
+                )
+            smach.StateMachine.add(
+                "WAYPOINT_ITERATOR",
+                waypoint_iterator,
+                {"succeeded": "succeeded", "failed": "failed"},
+            )
diff --git a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
index 39cd67ce5..00c743c20 100644
--- a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
+++ b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
@@ -4,7 +4,7 @@
     WaitForPerson,
     DetectPointingDirection,
     Say,
-    PlayMotion,
+    DetectGesture,
     ReceiveObject,
 )
 
@@ -13,6 +13,24 @@
 
 class CarryMyLuggage(smach.StateMachine):
 
+    class ProcessPointingDirection(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["gesture_detected"],
+                output_keys=["pointing_direction"],
+            )
+
+        def execute(self, userdata):
+            if userdata.gesture_detected == "pointing_to_the_left":
+                userdata.pointing_direction = "left"
+            elif userdata.gesture_detected == "pointing_to_the_right":
+                userdata.pointing_direction = "right"
+            else:
+                return "failed"
+            return "succeeded"
+
     def __init__(self):
         smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
 
@@ -28,7 +46,16 @@ def __init__(self):
 
             smach.StateMachine.add(
                 "DETECT_POINTING_DIRECTION",
-                DetectPointingDirection(),
+                DetectGesture(),
+                transitions={
+                    "succeeded": "PROCESS_POINTING_DIRECTION",
+                    "failed": "SAY_FAILED_POINTING",
+                },
+            )
+
+            smach.StateMachine.add(
+                "PROCESS_POINTING_DIRECTION",
+                CarryMyLuggage.ProcessPointingDirection(),
                 transitions={
                     "succeeded": "SAY_BAG",
                     "failed": "SAY_FAILED_POINTING",
diff --git a/tasks/gpsr/config/lab.yaml b/tasks/gpsr/config/lab.yaml
index 96cd8cd52..f62e2c4c8 100644
--- a/tasks/gpsr/config/lab.yaml
+++ b/tasks/gpsr/config/lab.yaml
@@ -9,8 +9,8 @@ arena:
       beacons:
         tv_desk:
           near_pose:
-            position: {x: 0.96, y: 0.82, z: 0.0}
-            orientation: {x: 0.0, y: 0.0, z: 0.76, w: 0.65}
+            position:  {x: 0.989603867147269, y: 0.47950265241289325, z: 0.0}
+            orientation: {x: 0.0, y: 0.0, z: 0.738945375913055, w: 0.6737653385391044}
           near_polygon: [[-0.11, 0.00], [-0.08, 2.36], [1.84, 2.75], [2.02, 1.31]]
           search_pose:
             position: {x: 0.97, y: 1.60, z: 0.0}
@@ -18,8 +18,8 @@ arena:
           search_polygon: [[0.40, 1.92], [0.31, 2.45], [1.36, 2.70], [1.456, 2.17]]
         window_desk:
           near_pose:
-            position: {x: 2.38, y: -3.0, z: 0.0}
-            orientation: {x: 0.0, y: 0.0, z: -0.62, w: 0.79}
+            position: {x: 2.6771759491107954, y: -2.2998999761206105, z: 0.0}
+            orientation: {x: 0.0, y: 0.0, z: -0.6148810103509047, w: 0.7886198977389871}
           near_polygon: [[3.34, -3.27], [3.75, -4.80], [2.47, -5.20], [1.75, -3.58]]
           search_pose:
             position: {x: 2.47, y: -3.73, z: 0.0}
diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py
index 0208686dd..2e005919b 100644
--- a/tasks/gpsr/src/gpsr/state_machine_factory.py
+++ b/tasks/gpsr/src/gpsr/state_machine_factory.py
@@ -2,7 +2,7 @@
 import rospy
 import smach
 from typing import Dict, List
-from lasr_skills import GoToLocation, FindNamedPerson  # type: ignore
+from lasr_skills import GoToLocation, FindNamedPerson, FindGesturePerson  # type: ignore
 from gpsr.states import Talk
 
 
@@ -43,11 +43,25 @@ def _handle_find_command(self, command_param: Dict):
 
     def _handle_talk_command(self, command_param: Dict):
         if "gesture" in command_param:
-            raise NotImplementedError("Talk command with gesture not implemented")
-        elif "text" in command_param:
+            if not "location" in command_param:
+                raise ValueError(
+                    "Talk command with gesture but no room in command parameters"
+                )
+            location_param_room = f"/gpsr/arena/rooms/{command_param['location']}"
+            self.sm.add(
+                f"STATE_{self._increment_state_count()}",
+                FindGesturePerson(
+                    location_param=location_param_room, gesture=command_param["gesture"]
+                ),
+                transitions={
+                    "succeeded": f"STATE_{self.state_count + 1}",
+                    "failed": "failed",
+                },
+            )
+        if "talk" in command_param:
             self.sm.add(
                 f"STATE_{self._increment_state_count()}",
-                Talk(text=command_param["text"]),
+                Talk(talk_phrase=command_param["talk"]),
                 transitions={
                     "succeeded": f"STATE_{self.state_count + 1}",
                     "failed": "failed",
@@ -143,8 +157,8 @@ def build_state_machine(self) -> smach.StateMachine:
                     raise ValueError(f"Invalid command verb: {command_verb}")
 
             self.sm.add(
-                f"STATE_{self.state_count}",
-                Talk(text="I have completed the task."),
+                f"STATE_{self.state_count+1}",
+                Talk(talk_phrase="I have completed the task."),
                 transitions={"succeeded": "succeeded", "failed": "failed"},
             )
 

From d7c3ca98609481dd80b5cb3bf8193831b6728372 Mon Sep 17 00:00:00 2001
From: fireblonde <71558254+fireblonde@users.noreply.github.com>
Date: Fri, 24 May 2024 12:30:47 +0100
Subject: [PATCH 2/3] feat: look at person (#166)

* refactor: use correct naming conventions.

* feat/refactor: introduce generic object detection skills, modularise from this.

* refactor: let the generic perception skills acquire their own sensor input (image/pcl).

* refactor/chore: imports and add LookForSeats stub.

* feat: always publish markers for debugging, when doing 3D point estimation.

* WIP: look for place to seat people.

* chore: default sample rate.

* chore: setup receptionist demo points.

* fix: sleep.

* feat: LookToPoint skill.

* chore: reorganise receptionist Rasa data and pipeline.

* feat: additional motions.

* chore: update receptionist demo.

* feat: look for chair and seat guest.

* WIP: get guest name.

* feat: be more informative.

* fix: set unknown.

* feat: announce name as well as drink.

* refactor: ignore point.

* feat: be more explicit; look at the chair and semantically describe its location.

* WIP.

* feat: add option to specify device to use for whisper.

* feat: simple transcribe microphone service.

* refactor: make transcribe and parse service use the new 'simple' transcribe audio service.

* feat: terminate audio interface.

* chore: update CMakeLists.

* refactor: acquire microphone when initialising service.

* chore: update launch file.

* fix: remove call to redundant method,

* fix: error handling.

* feat/fix: pre-load whisper model by prompting it, adjust for ambient noise at the beginning.

* chore: configuration required for deploying docs

* docs: update container documentation

* ci: first draft of documentation build through actions

* ci: remove submodules

* ci: use ubuntu 20.04

* ci: try using ros:noetic container image

* ci: catkin tools isn't available in container

* ci: remove extranous source line

* ci: try the one-liner ROS installer

* ci: use the official ROS noetic installation guide

* ci: install catkin tools

* ci: try sourcing before commands

* ci: deploy to github pages (no stage check yet)

* ci: use deploy key

* ci: only deploy if on main

* chore: update receptionist data and pipeline.

* fix: explicitly set spacy NLP model.

* fix: drop age for now, since spacy is broken(?).

* feat: point and raise_torso motions.

* feat: point to chair.

* feat: additional motions.

* fix: reset motions.

* fix: handle empty transcriptions.

* fix: enable start state.

* fix: properly reset motions

* fix(ci): use github_token for gh pages deploy.

* revert: fix(ci): use github_token for gh pages deploy.

* feat: save microphone output to audio files

* chore: update gitignore with python setup files

* fix: filename typo and add to cmake

* testtesttest

* Test

* Test

* Message Change

* fixed message writting and reading

* fixed image crop

* Updated the feature extraction node and changed the messages.

* fixed that torsal and head frames were inversed.

* Changed colour format from BGR to RGB within the detection process.

* keep the saved model

* keep the model file

* keep the saved model

* Cancel saving the images (but sitll cannot see use cv2.imshow)

* Runnable demo

* added the hair colour distribution matching method

* retrained model is very robust so changed the threshold

* Moving the head to meet the person.

* xyz axis readable.

* (Hopefully) Runnable with 3d input.

* Speak normally.

* Try to move the head.

* testtesttest

* Test

* Test

* Message Change

* fixed message writting and reading

* fixed image crop

* Updated the feature extraction node and changed the messages.

* fixed that torsal and head frames were inversed.

* Changed colour format from BGR to RGB within the detection process.

* keep the saved model

* keep the model file

* keep the saved model

* Cancel saving the images (but sitll cannot see use cv2.imshow)

* Runnable demo

* added the hair colour distribution matching method

* retrained model is very robust so changed the threshold

* Moving the head to meet the person.

* xyz axis readable.

* (Hopefully) Runnable with 3d input.

* ah

* At least the head moves, not looking at me though.

* Cleaned the file to have only one model appear.

* Replace the old model with the new one.

* correct the lost module.

* info update

* fixed issues in the service

* fixed a stupic typo

* runnable version for full demo

* Recover the state machine for demo.

* Added a simple loop to refresh the frame taken, should work fine.

* adding learn_face

* adding detect faces

* Add detect faces state

* editing detect phases

* Implement state for learning faces

* adding in extra states

* any changes

* adjustments when testing

* push lol

* finished merging?

* new state machine

* merging from rexy laptop

* fixed detect faces

* getting rid of lasr_speech

* swapping from base controllers to skills

* fixing state machine after merge with main

* making changes

* TEST: .idea diff

* feat: get name and drink skill & prior data yaml

* fix: missing keys for get name & drink constructor

* feat: stubs for get attribute state

* feat: wrap head manager and lowering base into GoToLocation.

* feat: state machine setup.

* test: Github tracking fixes

* feat: allow AskAndListen to take static text, a format string or reda from userdata.

* refactor: being picky about spaces.

* fix: import states in dunder init.

* feat: get name and drink, and stub for attributes.

* fix: typehint for userdata

* feat: setup host.

* fix: typehint.

* fix: refactor ParseNameAndDrink state to convert possible names and drinks to lowercase

* refactor: receptionist -> sm_merge (conflicts with python package...)

* refactor: remove redundant __init__.py

* fix: container consistency.

* fix: initialise data correctly.

* fix: lower.

* fix: uncomment.

* feat: take seating area pose and polygon as input to sm.

* feat: guide guest to seating area.

* feat: add introduce state

* feat: add introduction to state machine

* feat(WIP): find empty seat.

* fix: sneaky parenthesis stops boolifying.

* fix: correctly access guest attribute data

* fix: get introduce working.

* feat(WIP, still): find an empty seat.

* feat/refactor: SeatGuest is working and rename.

* feat: update init.py

* refactor: static motions.

* feat: seat guest in state machine.

* feat: send head back to default.

* refactor: center -> centre.

* feat: logic for second guest and pass guest id through constructors.

* fix: transition.

* refactor: drop unused method.

* Remove print statement and add TODO.

* fix: namespace.

* fix: transition and assume PlayMotion succeeded.

* test: yaml setup for testing.

* fix: transitions.

* refactor: remove redundant comment.

* fix: dumb intersection bug.

* feat: clip skill for VQA

* feat: attempt of clip for attributes

* fix: missing imports

* feat: tidied clip attributes

* refactor: cleanup.

* refactor: cleanup again.

* refactor: massive cleanup.

* refactor: remove hair colour images.

* refactor: cleanup cv2_img.

* refactor: move Vqa service message into lasr_vision_msgs.

* fix: import.

* cleanup: remove vision changes.

* refactor: cleanup clip vqa skill.

* refactor: cleanup describe people skill.

* refactor: cleanup vision msgs.

* refactor: remove shebang and unused import.

* refactor: remove unnecessary message.

* refactor: make QueryImage a ServiceState.

* refactor: make QueryImage a ServiceState.

* build: CMake and requirements.

* feat: fix the msgs for bodypix

* feat: test initial segment person

* feat: filter the poses liek the masks

* feat: filter the poses liek the masks

* chore: add pcl to img msg and keep the same timestampt

* chore: change the look to point to use pointstamp instead

* chore: add the 3d image skill

* feat: working look at person

* chore: cleanup for PR

* fix: the PR comments + add head manager disable/enable

* feat: wip

* feat: working wip

* feat: look at multiple people working

* chore: prep for PR

* chore: reformat agaaaain

---------

Co-authored-by: Jared Swift <j.w.swift@outlook.com>
Co-authored-by: Paul Makles <me@insrt.uk>
Co-authored-by: m-barker <mattbarker322@gmail.com>
Co-authored-by: Benteng Ma <benteng.ma@kcl.ac.uk>
Co-authored-by: MBT <59250708+MBTMBTMBT@users.noreply.github.com>
Co-authored-by: tiago <example@example.com>
Co-authored-by: Zoe <ezoe013@gmail.com>
Co-authored-by: Aaliyah Merchant <aaliyah.merchant@kcl.ac.uk>
Co-authored-by: Haiwei Luo <haiwei8809@gmail.com>
---
 .../helpers/cv2_pcl/src/cv2_pcl/__init__.py   |  11 +
 .../src/lasr_vision_bodypix/bodypix.py        |   1 -
 .../lasr_vision_msgs/msg/BodyPixPose.msg      |   1 -
 skills/src/lasr_skills/detect_faces.py        |  19 +-
 skills/src/lasr_skills/look_at_person.py      | 282 ++++++++++++++++++
 skills/src/lasr_skills/look_to_point.py       |   7 +-
 tasks/receptionist/launch/setup.launch        |   1 -
 7 files changed, 309 insertions(+), 13 deletions(-)
 create mode 100755 skills/src/lasr_skills/look_at_person.py

diff --git a/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py b/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py
index 05f3ef901..dfc27415c 100644
--- a/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py
+++ b/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py
@@ -2,12 +2,23 @@
 from sensor_msgs.msg import PointCloud2
 import ros_numpy as rnp
 import cv2
+from cv2_img import cv2_img_to_msg
 
 from typing import Tuple, Union
 
 Mat = np.ndarray
 
 
+def pcl_to_img_msg(pcl: PointCloud2) -> Mat:
+    """
+    Convert a given PointCloud2 message to img_msg
+    """
+    # keep the same timestamp
+    cv2 = pcl_to_cv2(pcl)
+
+    return cv2_img_to_msg(cv2, pcl.header.stamp)
+
+
 def pcl_to_cv2(
     pcl: PointCloud2, height: Union[int, None] = None, width: Union[int, None] = None
 ) -> Mat:
diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
index dcd811b89..b6afa0f6a 100644
--- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
+++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
@@ -114,7 +114,6 @@ def detect(
         keypoints_msg = []
 
         for i, keypoint in pose.keypoints.items():
-
             if camel_to_snake(keypoint.part) in request.masks[0].parts:
                 keypoint_msg = BodyPixKeypoint()
                 keypoint_msg.xy = [int(keypoint.position.x), int(keypoint.position.y)]
diff --git a/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg b/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
index 1ba8b9f7d..0d416eeae 100644
--- a/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
+++ b/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
@@ -1,2 +1 @@
-# keypoints
 BodyPixKeypoint[] keypoints
\ No newline at end of file
diff --git a/skills/src/lasr_skills/detect_faces.py b/skills/src/lasr_skills/detect_faces.py
index c0605b3f8..55af6204a 100644
--- a/skills/src/lasr_skills/detect_faces.py
+++ b/skills/src/lasr_skills/detect_faces.py
@@ -3,21 +3,30 @@
 
 from lasr_vision_msgs.srv import DetectFaces as DetectFacesSrv
 from sensor_msgs.msg import Image
+from cv2_pcl import pcl_to_img_msg
 
 
 class DetectFaces(smach.State):
-
-    def __init__(self, image_topic: str = "/xtion/rgb/image_raw"):
+    def __init__(
+        self,
+        image_topic: str = "/xtion/rgb/image_raw",
+    ):
         smach.State.__init__(
-            self, outcomes=["succeeded", "failed"], output_keys=["detections"]
+            self,
+            outcomes=["succeeded", "failed"],
+            input_keys=["pcl_msg"],
+            output_keys=["detections"],
         )
+        self.img_msg = None
         self._image_topic = image_topic
         self._detect_faces = rospy.ServiceProxy("/detect_faces", DetectFacesSrv)
 
     def execute(self, userdata):
-        img_msg = rospy.wait_for_message(self._image_topic, Image)
+        self.img_msg = pcl_to_img_msg(userdata.pcl_msg)
+        if not self.img_msg:
+            self.img_msg = rospy.wait_for_message(self._image_topic, Image)
         try:
-            result = self._detect_faces(img_msg)
+            result = self._detect_faces(self.img_msg)
             userdata.detections = result
             return "succeeded"
         except rospy.ServiceException as e:
diff --git a/skills/src/lasr_skills/look_at_person.py b/skills/src/lasr_skills/look_at_person.py
new file mode 100755
index 000000000..1fc477518
--- /dev/null
+++ b/skills/src/lasr_skills/look_at_person.py
@@ -0,0 +1,282 @@
+import smach_ros
+from geometry_msgs.msg import PointStamped
+import smach
+from vision import GetPointCloud
+from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest
+from lasr_vision_msgs.msg import BodyPixMaskRequest
+from lasr_skills import LookToPoint, DetectFaces
+import cv2
+import rospy
+from sensor_msgs.msg import Image
+from cv2_img import cv2_img_to_msg, msg_to_cv2_img
+from markers import create_and_publish_marker
+from visualization_msgs.msg import Marker
+from cv2_pcl import pcl_to_img_msg
+import ros_numpy as rnp
+import rosservice
+from smach import CBState
+from std_msgs.msg import String
+import actionlib
+from control_msgs.msg import PointHeadAction, PointHeadGoal
+from geometry_msgs.msg import Point
+
+PUBLIC_CONTAINER = False
+
+try:
+    from pal_startup_msgs.srv import (
+        StartupStart,
+        StartupStop,
+        StartupStartRequest,
+        StartupStopRequest,
+    )
+except ModuleNotFoundError:
+    PUBLIC_CONTAINER = True
+
+
+class LookAtPerson(smach.StateMachine):
+    class CheckEyes(smach.State):
+        def __init__(self, debug=True):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed", "no_detection"],
+                input_keys=["bbox_eyes", "pcl_msg", "masks", "detections"],
+                output_keys=["pointstamped"],
+            )
+            self.DEBUG = debug
+            self.img_pub = rospy.Publisher("/debug/image", Image, queue_size=1)
+            self.marker_pub = rospy.Publisher("eyes", Marker, queue_size=1)
+            self.look_at_pub = actionlib.SimpleActionClient(
+                "/head_controller/point_head_action", PointHeadAction
+            )
+
+        def execute(self, userdata):
+            rospy.sleep(1)
+            if len(userdata.bbox_eyes) < 1 and len(userdata.detections.detections) > 0:
+                return "succeeded"
+            elif (
+                len(userdata.bbox_eyes) < 1 and len(userdata.detections.detections) < 1
+            ):
+                return "no_detection"
+
+            for det in userdata.bbox_eyes:
+                left_eye = det["left_eye"]
+                right_eye = det["right_eye"]
+                eye_point = (left_eye[0] + right_eye[0]) / 2, (
+                    left_eye[1] + right_eye[1]
+                ) / 2
+
+                if self.DEBUG:
+                    img = msg_to_cv2_img(pcl_to_img_msg(userdata.pcl_msg))
+                    cv2.circle(img, (left_eye[0], left_eye[1]), 5, (0, 255, 0), -1)
+                    cv2.circle(img, (right_eye[0], right_eye[1]), 5, (0, 255, 0), -1)
+                    img_msg1 = cv2_img_to_msg(img)
+                    self.img_pub.publish(img_msg1)
+
+                pcl_xyz = rnp.point_cloud2.pointcloud2_to_xyz_array(
+                    userdata.pcl_msg, remove_nans=False
+                )
+                eye_point_pcl = pcl_xyz[int(eye_point[1]), int(eye_point[0])]
+                if any([True for i in eye_point_pcl if i != i]):
+                    eye_point_pcl = pcl_xyz[int(right_eye[1]), int(right_eye[0])]
+
+                look_at = PointStamped()
+                look_at.header = userdata.pcl_msg.header
+                look_at.point.x = eye_point_pcl[0]
+                look_at.point.y = eye_point_pcl[1]
+                look_at.point.z = eye_point_pcl[2]
+
+                if self.DEBUG:
+                    create_and_publish_marker(self.marker_pub, look_at, r=0, g=1, b=0)
+
+                userdata.bbox_eyes.remove(det)
+                userdata.pointstamped = look_at
+
+                self.look_at_pub.wait_for_server()
+                goal = PointHeadGoal()
+                goal.pointing_frame = "head_2_link"
+                goal.pointing_axis = Point(1.0, 0.0, 0.0)
+                goal.max_velocity = 1.0
+                goal.target = look_at
+                self.look_at_pub.send_goal(goal)
+
+                return "succeeded"
+
+            return "failed"
+
+    @smach.cb_interface(input_keys=["poses", "detections"], output_keys=["bbox_eyes"])
+    def match_poses_and_detections(ud):
+        bbox_eyes = []
+        for pose in ud.poses:
+            for detection in ud.detections.detections:
+                temp = {
+                    "bbox": detection.xywh,
+                }
+                for keypoint in pose.keypoints:
+                    if (
+                        keypoint.part == "leftEye"
+                        and detection.xywh[0]
+                        < keypoint.xy[0]
+                        < detection.xywh[0] + detection.xywh[2]
+                        and detection.xywh[1]
+                        < keypoint.xy[1]
+                        < detection.xywh[1] + detection.xywh[3]
+                    ):
+                        temp["left_eye"] = keypoint.xy
+                    if (
+                        keypoint.part == "rightEye"
+                        and detection.xywh[0]
+                        < keypoint.xy[0]
+                        < detection.xywh[0] + detection.xywh[2]
+                        and detection.xywh[1]
+                        < keypoint.xy[1]
+                        < detection.xywh[1] + detection.xywh[3]
+                    ):
+                        temp["right_eye"] = keypoint.xy
+
+                    if "left_eye" in temp and "right_eye" in temp:
+                        bbox_eyes.append(temp)
+
+        ud.bbox_eyes = bbox_eyes
+
+        return "succeeded"
+
+    def __init__(self):
+        super(LookAtPerson, self).__init__(
+            outcomes=["succeeded", "failed"],
+            input_keys=[],
+            output_keys=["masks", "poses", "pointstamped"],
+        )
+        self.DEBUG = rospy.get_param("/debug", True)
+        IS_SIMULATION = (
+            "/pal_startup_control/start" not in rosservice.get_service_list()
+        )
+
+        with self:
+            if not IS_SIMULATION:
+                if PUBLIC_CONTAINER:
+                    rospy.logwarn(
+                        "You are using a public container. The head manager will not be stopped during navigation."
+                    )
+                else:
+                    smach.StateMachine.add(
+                        "DISABLE_HEAD_MANAGER",
+                        smach_ros.ServiceState(
+                            "/pal_startup_control/stop",
+                            StartupStop,
+                            request=StartupStopRequest("head_manager"),
+                        ),
+                        transitions={
+                            "succeeded": "GET_IMAGE",
+                            "aborted": "failed",
+                            "preempted": "failed",
+                        },
+                    )
+            smach.StateMachine.add(
+                "GET_IMAGE",
+                GetPointCloud("/xtion/depth_registered/points"),
+                transitions={
+                    "succeeded": "SEGMENT_PERSON",
+                },
+                remapping={"pcl_msg": "pcl_msg"},
+            )
+
+            eyes = BodyPixMaskRequest()
+            eyes.parts = ["left_eye", "right_eye"]
+            masks = [eyes]
+
+            smach.StateMachine.add(
+                "SEGMENT_PERSON",
+                smach_ros.ServiceState(
+                    "/bodypix/detect",
+                    BodyPixDetection,
+                    request_cb=lambda ud, _: BodyPixDetectionRequest(
+                        pcl_to_img_msg(ud.pcl_msg), "resnet50", 0.7, masks
+                    ),
+                    response_slots=["masks", "poses"],
+                    input_keys=["pcl_msg"],
+                    output_keys=["masks", "poses"],
+                ),
+                transitions={
+                    "succeeded": "DETECT_FACES",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+            )
+            smach.StateMachine.add(
+                "DETECT_FACES",
+                DetectFaces(),
+                transitions={
+                    "succeeded": "MATCH_POSES_AND_DETECTIONS",
+                    "failed": "failed",
+                },
+                remapping={"pcl_msg": "pcl_msg", "detections": "detections"},
+            )
+
+            smach.StateMachine.add(
+                "MATCH_POSES_AND_DETECTIONS",
+                CBState(
+                    self.match_poses_and_detections,
+                    input_keys=["poses", "detections"],
+                    output_keys=["poses"],
+                    outcomes=["succeeded", "failed"],
+                ),
+                transitions={"succeeded": "CHECK_EYES", "failed": "failed"},
+                remapping={"bbox_eyes": "bbox_eyes"},
+            )
+            smach.StateMachine.add(
+                "CHECK_EYES",
+                self.CheckEyes(self.DEBUG),
+                transitions={
+                    "succeeded": "LOOK_TO_POINT",
+                    "failed": "failed",
+                    "no_detection": "succeeded",
+                },
+                remapping={
+                    "pcl_msg": "pcl_msg",
+                    "bbox_eyes": "bbox_eyes",
+                    "pointstamped": "pointstamped",
+                },
+            )
+
+            smach.StateMachine.add(
+                "LOOK_TO_POINT",
+                LookToPoint(),
+                transitions={
+                    "succeeded": "LOOP",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+                remapping={"pointstamped": "pointstamped"},
+            )
+            smach.StateMachine.add(
+                "LOOP",
+                smach.CBState(
+                    lambda ud: "succeeded" if len(ud.bbox_eyes) > 0 else "finish",
+                    input_keys=["bbox_eyes"],
+                    output_keys=["bbox_eyes"],
+                    outcomes=["succeeded", "finish"],
+                ),
+                transitions={
+                    "succeeded": "CHECK_EYES",
+                    "finish": "ENABLE_HEAD_MANAGER",
+                },
+            )
+            if not IS_SIMULATION:
+                if PUBLIC_CONTAINER:
+                    rospy.logwarn(
+                        "You are using a public container. The head manager will not be start following navigation."
+                    )
+                else:
+                    smach.StateMachine.add(
+                        "ENABLE_HEAD_MANAGER",
+                        smach_ros.ServiceState(
+                            "/pal_startup_control/start",
+                            StartupStart,
+                            request=StartupStartRequest("head_manager", ""),
+                        ),
+                        transitions={
+                            "succeeded": "succeeded",
+                            "preempted": "failed",
+                            "aborted": "failed",
+                        },
+                    )
diff --git a/skills/src/lasr_skills/look_to_point.py b/skills/src/lasr_skills/look_to_point.py
index 81c633ad7..92fe2e3e1 100755
--- a/skills/src/lasr_skills/look_to_point.py
+++ b/skills/src/lasr_skills/look_to_point.py
@@ -13,10 +13,7 @@ def __init__(self):
                 pointing_frame="head_2_link",
                 pointing_axis=Point(1.0, 0.0, 0.0),
                 max_velocity=1.0,
-                target=PointStamped(
-                    header=Header(frame_id="map"),
-                    point=ud.point,
-                ),
+                target=ud.pointstamped,
             ),
-            input_keys=["point"],
+            input_keys=["pointstamped"],
         )
diff --git a/tasks/receptionist/launch/setup.launch b/tasks/receptionist/launch/setup.launch
index a8fd79ede..894406256 100644
--- a/tasks/receptionist/launch/setup.launch
+++ b/tasks/receptionist/launch/setup.launch
@@ -19,5 +19,4 @@
     <node pkg="lasr_vision_feature_extraction" type="service" name="torch_service" output="screen"/>
     <node pkg="lasr_vision_bodypix" type="service" name="bodypix_service" output="screen"/>
 
-
 </launch>

From 09bae511042c4a97b5173feb60e9efa5138bd353 Mon Sep 17 00:00:00 2001
From: Jared Swift <j.w.swift@outlook.com>
Date: Tue, 28 May 2024 10:06:41 +0100
Subject: [PATCH 3/3] Receptionist fixes 25 04 (#207)

* fix: boolify facial hair attribute.

* fix: drop host attributes.

* feat: use DescribePeople skill.

* fix: update guest str generation.

* refactor: remove redundant message.
---
 .../image_with_masks_and_attributes.py        |   2 +-
 tasks/receptionist/scripts/main.py            |   6 -
 .../src/receptionist/states/get_attributes.py | 137 ++++++------------
 .../src/receptionist/states/introduce.py      |  12 +-
 4 files changed, 54 insertions(+), 103 deletions(-)

diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
index c5df408a4..d74fdb450 100644
--- a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
+++ b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
@@ -195,7 +195,7 @@ def describe(self) -> str:
                 "hair_colour": hair_colour_str,
                 "hair_shape": hair_shape_str,
                 "male": male[0],
-                "facial_hair": facial_hair[0],
+                "facial_hair": facial_hair[0] != "No_Beard",
                 "hat": hat[0],
                 "glasses": glasses[0],
                 "earrings": earrings[0],
diff --git a/tasks/receptionist/scripts/main.py b/tasks/receptionist/scripts/main.py
index 808952737..5e9cd28e6 100755
--- a/tasks/receptionist/scripts/main.py
+++ b/tasks/receptionist/scripts/main.py
@@ -33,12 +33,6 @@
         {
             "name": "John",
             "drink": "beer",
-            "attributes": {
-                "hair_colour": "strawberry blonde",
-                "glasses": False,
-                "hat": True,
-                "height": "tall",
-            },
         },
     )
     outcome = receptionist.execute()
diff --git a/tasks/receptionist/src/receptionist/states/get_attributes.py b/tasks/receptionist/src/receptionist/states/get_attributes.py
index 22538a267..2f0dfd3bb 100644
--- a/tasks/receptionist/src/receptionist/states/get_attributes.py
+++ b/tasks/receptionist/src/receptionist/states/get_attributes.py
@@ -1,103 +1,56 @@
-"""
-State for calling the service to get a set of guest attributes.
-Currently incomplete.
-"""
-
-import rospy
 import smach
 from smach import UserData
-from typing import List, Any, Dict, Union
-from lasr_vision_clip.srv import VqaRequest, VqaResponse, Vqa
+from lasr_skills import DescribePeople
+import json
+
+
+class GetGuestAttributes(smach.StateMachine):
+
+    class HandleGuestAttributes(smach.State):
+        def __init__(self, guest_id: str):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["people", "guest_data"],
+                output_keys=["guest_data"],
+            )
 
+            self._guest_id: str = guest_id
+
+        def execute(self, userdata: UserData) -> str:
+            if len(userdata.people) == 0:
+                return "failed"
+            userdata.guest_data[self._guest_id]["attributes"] = json.loads(
+                userdata.people[0]["features"]
+            )["attributes"]
+            return "succeeded"
 
-class GetGuestAttributes(smach.State):
     def __init__(
         self,
         guest_id: str,
-        attribute_service: Union[str, None] = None,
-        outcomes: List[str] = ["succeeded", "failed"],
-        input_keys: List[str] = ["guest_id", "guest_data"],
-        output_keys: List[str] = ["guest_data"],
     ):
-        """Calls and parses the service that gets a set of guest attributes.
-
-        Args:
-            attribute_service (str): Name of the service to call that returns the guest's attributes.
-        """
-
-        super().__init__(
-            outcomes=outcomes,
-            input_keys=input_keys,
-            output_keys=output_keys,
+        smach.StateMachine.__init__(
+            self,
+            outcomes=["succeeded", "failed"],
+            input_keys=["guest_id", "guest_data"],
+            output_keys=["guest_data"],
         )
-        self._service_proxy = rospy.ServiceProxy("/clip_vqa/query_service", Vqa)
         self._guest_id: str = guest_id
-        self._attribute_service: Union[str, None] = attribute_service
-
-    def _call_attribute_service(self):
-        # TODO
-        pass
-
-    def _send_vqa_request(self, possible_answers: List[str]) -> str:
-        request = VqaRequest()
-        request.possible_answers = possible_answers
-        response = self._service_proxy(request)
-        return response.answer
-
-    def execute(self, userdata: UserData) -> str:
-        if self._attribute_service:
-            attributes = self._call_attribute_service()
-        else:
-            glasses_answers = [
-                "a person wearing glasses",
-                "a person not wearing glasses",
-            ]
-            hat_answers = ["a person wearing a hat", "a person not wearing a hat"]
-            height_answers = ["a tall person", "a short person"]
-            hair_colour_answers = [
-                "a person with black hair",
-                "a person with blonde hair",
-                "a person with brown hair",
-                "a person with red hair",
-                "a person with grey hair",
-                "a person with white hair",
-            ]
-
-            glasses_response = self._send_vqa_request(glasses_answers)
-            hat_response = self._send_vqa_request(hat_answers)
-            height_response = self._send_vqa_request(height_answers)
-            hair_colour_response = self._send_vqa_request(hair_colour_answers)
-
-            if glasses_response == "a person wearing glasses":
-                glasses = True
-            else:
-                glasses = False
-            if hat_response == "a person wearing a hat":
-                hat = True
-            else:
-                hat = False
-            if height_response == "a tall person":
-                height = "tall"
-            else:
-                height = "short"
-            if hair_colour_response == "a person with black hair":
-                hair_colour = "black"
-            elif hair_colour_response == "a person with blonde hair":
-                hair_colour = "blonde"
-            elif hair_colour_response == "a person with brown hair":
-                hair_colour = "brown"
-            elif hair_colour_response == "a person with red hair":
-                hair_colour = "red"
-            elif hair_colour_response == "a person with grey hair":
-                hair_colour = "grey"
-
-            attributes = {
-                "hair_colour": hair_colour,
-                "glasses": glasses,
-                "hat": hat,
-                "height": height,
-            }
-
-        userdata.guest_data[self._guest_id]["attributes"] = attributes
 
-        return "succeeded"
+        with self:
+            smach.StateMachine.add(
+                "GET_GUEST_ATTRIBUTES",
+                DescribePeople(),
+                transitions={
+                    "succeeded": "HANDLE_GUEST_ATTRIBUTES",
+                    "failed": "failed",
+                },
+            )
+            smach.StateMachine.add(
+                "HANDLE_GUEST_ATTRIBUTES",
+                self.HandleGuestAttributes(self._guest_id),
+                transitions={
+                    "succeeded": "succeeded",
+                    "failed": "failed",
+                },
+            )
diff --git a/tasks/receptionist/src/receptionist/states/introduce.py b/tasks/receptionist/src/receptionist/states/introduce.py
index cedb54740..77dcc96d9 100644
--- a/tasks/receptionist/src/receptionist/states/introduce.py
+++ b/tasks/receptionist/src/receptionist/states/introduce.py
@@ -27,10 +27,14 @@ def stringify_guest_data(guest_data: Dict[str, Any], guest_id: str) -> str:
 
     guest_str = ""
 
-    guest_str += f"{relevant_guest_data['name']}, their favourite drink is {relevant_guest_data['drink']}. "
-    guest_str += f"They have {relevant_guest_data['attributes']['hair_colour']} hair, their height is {relevant_guest_data['attributes']['height']}, "
-    guest_str += f"they are {'wearing glasses' if relevant_guest_data['attributes']['glasses'] else 'not wearing glasses'}, and they are "
-    guest_str += f"{'wearing a hat' if relevant_guest_data['attributes']['hat'] else 'not wearing a hat'}."
+    if "attributes" in relevant_guest_data.keys():
+        guest_str += f"They have {relevant_guest_data['attributes']['hair_shape']} {relevant_guest_data['attributes']['hair_colour']} hair, and they "
+        guest_str += f"{'have facial hair' if relevant_guest_data['attributes']['facial_hair'] else 'do not have facial hair'}. "
+        guest_str += f"They are {'wearing glasses' if relevant_guest_data['attributes']['glasses'] else 'not wearing glasses'}, "
+        guest_str += f"{'wearing earrings' if relevant_guest_data['attributes']['earrings'] else 'not wearing earrings'}, and "
+        guest_str += f"{'wearing a hat' if relevant_guest_data['attributes']['hat'] else 'not wearing a hat'}. "
+        guest_str += f"They are {'wearing a necklace' if relevant_guest_data['attributes']['necklace'] else 'not wearing a necklace'}, and "
+        guest_str += f"{'wearing a necktie' if relevant_guest_data['attributes']['necktie'] else 'not wearing a necktie'}. "
 
     return guest_str