From 953d35c7851b6117aa5b77fa26fb8df9fc26e8c0 Mon Sep 17 00:00:00 2001
From: Matt Barker <105945282+m-barker@users.noreply.github.com>
Date: Tue, 14 May 2024 10:47:43 +0100
Subject: [PATCH] Gesture detection (#202)

* fix: incorrect remapping keys for handover

* fix: remove speech server in launch file

* feat: add find gesture person skill, with support for raisied arms

* feat: add pointing to gesture detection

* feat: add debug publisher to gesture detection

* feat: add mimicing of gesture detected by Tiago

* feat: update CML with new pointing API

* feat: update GPSR API with new gesture detection skill

* chore: remove old pointing skill & service

* feat: make debug always be published
---
 .../lasr_vision_pointing/CMakeLists.txt       | 213 -----------------
 .../lasr_vision_pointing/images/.gitkeep      |   0
 .../vision/lasr_vision_pointing/package.xml   |  77 ------
 .../lasr_vision_pointing/requirements.in      |   1 -
 .../lasr_vision_pointing/requirements.txt     |  24 --
 .../scripts/detect_pointing.py                | 175 --------------
 common/vision/lasr_vision_pointing/setup.py   |  10 -
 skills/config/motions.yaml                    |  25 ++
 skills/src/lasr_skills/__init__.py            |   1 +
 skills/src/lasr_skills/detect_gesture.py      |  97 ++++++--
 skills/src/lasr_skills/detect_pointing.py     |  26 ---
 skills/src/lasr_skills/find_gesture_person.py | 221 ++++++++++++++++++
 .../src/carry_my_luggage/state_machine.py     |  31 ++-
 tasks/gpsr/config/lab.yaml                    |   8 +-
 tasks/gpsr/src/gpsr/state_machine_factory.py  |  26 ++-
 15 files changed, 381 insertions(+), 554 deletions(-)
 delete mode 100644 common/vision/lasr_vision_pointing/CMakeLists.txt
 delete mode 100644 common/vision/lasr_vision_pointing/images/.gitkeep
 delete mode 100644 common/vision/lasr_vision_pointing/package.xml
 delete mode 100644 common/vision/lasr_vision_pointing/requirements.in
 delete mode 100644 common/vision/lasr_vision_pointing/requirements.txt
 delete mode 100644 common/vision/lasr_vision_pointing/scripts/detect_pointing.py
 delete mode 100644 common/vision/lasr_vision_pointing/setup.py
 delete mode 100644 skills/src/lasr_skills/detect_pointing.py
 create mode 100644 skills/src/lasr_skills/find_gesture_person.py

diff --git a/common/vision/lasr_vision_pointing/CMakeLists.txt b/common/vision/lasr_vision_pointing/CMakeLists.txt
deleted file mode 100644
index 66f1cf831..000000000
--- a/common/vision/lasr_vision_pointing/CMakeLists.txt
+++ /dev/null
@@ -1,213 +0,0 @@
-cmake_minimum_required(VERSION 3.0.2)
-project(lasr_vision_pointing)
-
-## Compile as C++11, supported in ROS Kinetic and newer
-# add_compile_options(-std=c++11)
-
-## Find catkin macros and libraries
-## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
-## is used, also find other catkin packages
-find_package(catkin REQUIRED COMPONENTS
-  geometry_msgs
-  sensor_msgs
-  roscpp
-  rospy
-  std_msgs
-  catkin_virtualenv
-)
-
-## System dependencies are found with CMake's conventions
-# find_package(Boost REQUIRED COMPONENTS system)
-
-
-## Uncomment this if the package has a setup.py. This macro ensures
-## modules and global scripts declared therein get installed
-## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
-catkin_python_setup()
-catkin_generate_virtualenv(
-  INPUT_REQUIREMENTS requirements.in
-  PYTHON_INTERPRETER python3.10
-)
-
-################################################
-## Declare ROS messages, services and actions ##
-################################################
-
-## To declare and build messages, services or actions from within this
-## package, follow these steps:
-## * Let MSG_DEP_SET be the set of packages whose message types you use in
-##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
-## * In the file package.xml:
-##   * add a build_depend tag for "message_generation"
-##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
-##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
-##     but can be declared for certainty nonetheless:
-##     * add a exec_depend tag for "message_runtime"
-## * In this file (CMakeLists.txt):
-##   * add "message_generation" and every package in MSG_DEP_SET to
-##     find_package(catkin REQUIRED COMPONENTS ...)
-##   * add "message_runtime" and every package in MSG_DEP_SET to
-##     catkin_package(CATKIN_DEPENDS ...)
-##   * uncomment the add_*_files sections below as needed
-##     and list every .msg/.srv/.action file to be processed
-##   * uncomment the generate_messages entry below
-##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
-
-## Generate messages in the 'msg' folder
-# add_message_files(
-#   FILES
-#   Message1.msg
-#   Message2.msg
-# )
-
-## Generate services in the 'srv' folder
-# add_service_files(
-#   FILES
-#   PointingDirection.srv
-# )
-
-## Generate actions in the 'action' folder
-# add_action_files(
-#   FILES
-#   Action1.action
-#   Action2.action
-# )
-
-## Generate added messages and services with any dependencies listed here
-# generate_messages(
-#   DEPENDENCIES
-#   geometry_msgs 
-#   std_msgs
-#   sensor_msgs
-# )
-
-################################################
-## Declare ROS dynamic reconfigure parameters ##
-################################################
-
-## To declare and build dynamic reconfigure parameters within this
-## package, follow these steps:
-## * In the file package.xml:
-##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
-## * In this file (CMakeLists.txt):
-##   * add "dynamic_reconfigure" to
-##     find_package(catkin REQUIRED COMPONENTS ...)
-##   * uncomment the "generate_dynamic_reconfigure_options" section below
-##     and list every .cfg file to be processed
-
-## Generate dynamic reconfigure parameters in the 'cfg' folder
-# generate_dynamic_reconfigure_options(
-#   cfg/DynReconf1.cfg
-#   cfg/DynReconf2.cfg
-# )
-
-###################################
-## catkin specific configuration ##
-###################################
-## The catkin_package macro generates cmake config files for your package
-## Declare things to be passed to dependent projects
-## INCLUDE_DIRS: uncomment this if your package contains header files
-## LIBRARIES: libraries you create in this project that dependent projects also need
-## CATKIN_DEPENDS: catkin_packages dependent projects also need
-## DEPENDS: system dependencies of this project that dependent projects also need
-catkin_package(
-#  INCLUDE_DIRS include
-#  LIBRARIES lasr_vision_pointing
- CATKIN_DEPENDS geometry_msgs roscpp rospy std_msgs sensor_msgs
-#  DEPENDS system_lib
-)
-
-###########
-## Build ##
-###########
-
-## Specify additional locations of header files
-## Your package locations should be listed before other locations
-include_directories(
-# include
-  ${catkin_INCLUDE_DIRS}
-)
-
-## Declare a C++ library
-# add_library(${PROJECT_NAME}
-#   src/${PROJECT_NAME}/lasr_vision_pointing.cpp
-# )
-
-## Add cmake target dependencies of the library
-## as an example, code may need to be generated before libraries
-## either from message generation or dynamic reconfigure
-# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
-
-## Declare a C++ executable
-## With catkin_make all packages are built within a single CMake context
-## The recommended prefix ensures that target names across packages don't collide
-# add_executable(${PROJECT_NAME}_node src/lasr_vision_pointing_node.cpp)
-
-## Rename C++ executable without prefix
-## The above recommended prefix causes long target names, the following renames the
-## target back to the shorter version for ease of user use
-## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
-# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
-
-## Add cmake target dependencies of the executable
-## same as for the library above
-# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
-
-## Specify libraries to link a library or executable target against
-# target_link_libraries(${PROJECT_NAME}_node
-#   ${catkin_LIBRARIES}
-# )
-
-#############
-## Install ##
-#############
-
-# all install targets should use catkin DESTINATION variables
-# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
-
-## Mark executable scripts (Python etc.) for installation
-## in contrast to setup.py, you can choose the destination
-catkin_install_python(PROGRAMS
-  scripts/detect_pointing.py
-  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-)
-
-## Mark executables for installation
-## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
-# install(TARGETS ${PROJECT_NAME}_node
-#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-# )
-
-## Mark libraries for installation
-## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
-# install(TARGETS ${PROJECT_NAME}
-#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-#   RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
-# )
-
-## Mark cpp header files for installation
-# install(DIRECTORY include/${PROJECT_NAME}/
-#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
-#   FILES_MATCHING PATTERN "*.h"
-#   PATTERN ".svn" EXCLUDE
-# )
-
-## Mark other files for installation (e.g. launch and bag files, etc.)
-install(FILES
-  requirements.txt  
-  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
-)
-
-#############
-## Testing ##
-#############
-
-## Add gtest based cpp test target and link libraries
-# catkin_add_gtest(${PROJECT_NAME}-test test/test_lasr_vision_pointing.cpp)
-# if(TARGET ${PROJECT_NAME}-test)
-#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
-# endif()
-
-## Add folders to be run by python nosetests
-# catkin_add_nosetests(test)
diff --git a/common/vision/lasr_vision_pointing/images/.gitkeep b/common/vision/lasr_vision_pointing/images/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/common/vision/lasr_vision_pointing/package.xml b/common/vision/lasr_vision_pointing/package.xml
deleted file mode 100644
index d99ef49f8..000000000
--- a/common/vision/lasr_vision_pointing/package.xml
+++ /dev/null
@@ -1,77 +0,0 @@
-<?xml version="1.0"?>
-<package format="2">
-  <name>lasr_vision_pointing</name>
-  <version>0.0.0</version>
-  <description>The lasr_vision_pointing package</description>
-
-  <!-- One maintainer tag required, multiple allowed, one person per tag -->
-  <!-- Example:  -->
-  <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
-  <maintainer email="syraj.alkhalil7@gmail.com"> Serge Alkhalil</maintainer>
-
-
-  <!-- One license tag required, multiple allowed, one license per tag -->
-  <!-- Commonly used license strings: -->
-  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
-  <license>TODO</license>
-
-
-  <!-- Url tags are optional, but multiple are allowed, one per tag -->
-  <!-- Optional attribute type can be: website, bugtracker, or repository -->
-  <!-- Example: -->
-  <!-- <url type="website">http://wiki.ros.org/lasr_vision_pointing</url> -->
-
-
-  <!-- Author tags are optional, multiple are allowed, one per tag -->
-  <!-- Authors do not have to be maintainers, but could be -->
-  <!-- Example: -->
-  <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
-
-
-  <!-- The *depend tags are used to specify dependencies -->
-  <!-- Dependencies can be catkin packages or system dependencies -->
-  <!-- Examples: -->
-  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
-  <!--   <depend>roscpp</depend> -->
-  <!--   Note that this is equivalent to the following: -->
-  <!--   <build_depend>roscpp</build_depend> -->
-  <!--   <exec_depend>roscpp</exec_depend> -->
-  <!-- Use build_depend for packages you need at compile time: -->
-  <!--   <build_depend>message_generation</build_depend> -->
-  <!-- Use build_export_depend for packages you need in order to build against this package: -->
-  <!--   <build_export_depend>message_generation</build_export_depend> -->
-  <!-- Use buildtool_depend for build tool packages: -->
-  <!--   <buildtool_depend>catkin</buildtool_depend> -->
-  <!-- Use exec_depend for packages you need at runtime: -->
-  <!--   <exec_depend>message_runtime</exec_depend> -->
-  <!-- Use test_depend for packages you need only for testing: -->
-  <!--   <test_depend>gtest</test_depend> -->
-  <!-- Use doc_depend for packages you need only for building documentation: -->
-  <!--   <doc_depend>doxygen</doc_depend> -->
-  <buildtool_depend>catkin</buildtool_depend>
-  <build_depend>geometry_msgs</build_depend>
-  <build_depend>roscpp</build_depend>
-  <build_depend>rospy</build_depend>
-  <build_depend>std_msgs</build_depend>
-  <build_depend>sensor_msgs</build_depend>
-  <build_export_depend>geometry_msgs</build_export_depend>
-  <build_export_depend>roscpp</build_export_depend>
-  <build_export_depend>rospy</build_export_depend>
-  <build_export_depend>std_msgs</build_export_depend>
-  <exec_depend>geometry_msgs</exec_depend>
-  <exec_depend>roscpp</exec_depend>
-  <exec_depend>rospy</exec_depend>
-  <exec_depend>std_msgs</exec_depend>
-  <exec_depend>sensor_msgs</exec_depend>
-  <depend>lasr_vision_msgs</depend>
-
-  <!-- virtual env tag -->
-  <build_depend>catkin_virtualenv</build_depend>
-
-  <!-- The export tag contains other, unspecified, tags -->
-  <export>
-    <!-- Other tools can request additional information be placed here -->
-    <pip_requirements>requirements.txt</pip_requirements>
-
-  </export>
-</package>
diff --git a/common/vision/lasr_vision_pointing/requirements.in b/common/vision/lasr_vision_pointing/requirements.in
deleted file mode 100644
index c1b708709..000000000
--- a/common/vision/lasr_vision_pointing/requirements.in
+++ /dev/null
@@ -1 +0,0 @@
-mediapipe==0.10.10
\ No newline at end of file
diff --git a/common/vision/lasr_vision_pointing/requirements.txt b/common/vision/lasr_vision_pointing/requirements.txt
deleted file mode 100644
index e6ef834ab..000000000
--- a/common/vision/lasr_vision_pointing/requirements.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-absl-py==2.1.0            # via mediapipe
-attrs==23.2.0             # via mediapipe
-cffi==1.16.0              # via sounddevice
-contourpy==1.2.1          # via matplotlib
-cycler==0.12.1            # via matplotlib
-flatbuffers==24.3.25      # via mediapipe
-fonttools==4.51.0         # via matplotlib
-jax==0.4.26               # via mediapipe
-kiwisolver==1.4.5         # via matplotlib
-matplotlib==3.8.4         # via mediapipe
-mediapipe==0.10.10        # via -r requirements.in
-ml-dtypes==0.4.0          # via jax
-numpy==1.26.4             # via contourpy, jax, matplotlib, mediapipe, ml-dtypes, opencv-contrib-python, opt-einsum, scipy
-opencv-contrib-python==4.9.0.80  # via mediapipe
-opt-einsum==3.3.0         # via jax
-packaging==24.0           # via matplotlib
-pillow==10.3.0            # via matplotlib
-protobuf==3.20.3          # via mediapipe
-pycparser==2.22           # via cffi
-pyparsing==3.1.2          # via matplotlib
-python-dateutil==2.9.0.post0  # via matplotlib
-scipy==1.13.0             # via jax
-six==1.16.0               # via python-dateutil
-sounddevice==0.4.6        # via mediapipe
diff --git a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py b/common/vision/lasr_vision_pointing/scripts/detect_pointing.py
deleted file mode 100644
index b56e0d14e..000000000
--- a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import cv2
-import rospkg
-import rospy
-import mediapipe as mp
-from cv_bridge import CvBridge
-from lasr_vision_msgs.srv import (
-    YoloDetectionRequest,
-    YoloDetection,
-    PointingDirection,
-    PointingDirectionResponse,
-    PointingDirectionRequest,
-)
-
-
-class PointingDetector:
-    def __init__(self):
-        self.detect_service = rospy.ServiceProxy("/yolov8/detect", YoloDetection)
-        self.service = rospy.Service(
-            "/pointing_detection_service", PointingDirection, self.excute
-        )
-        self.counter = 0
-
-        # Load MediaPipe Pose model
-        self.mp_pose = mp.solutions.pose
-        self.pose = self.mp_pose.Pose()
-
-    def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse:
-        img = req.image_raw
-        resp = PointingDirectionResponse()
-        people_bounding_box = self.detection(img)
-
-        if people_bounding_box:
-            for person in people_bounding_box:
-                keypoints = self.detect_keypoints(
-                    img
-                )  # Detect keypoints using MediaPipe
-                direction = self.determine_pointing_direction(
-                    keypoints, buffer_width=25
-                )
-                rospy.loginfo(f"Person detected pointing: {direction}")
-
-                # Visualize pointing direction with landmarks
-                image_path = os.path.join(
-                    rospkg.RosPack().get_path("lasr_vision_pointing"),
-                    "images",
-                    f"image{self.counter}.jpg",
-                )
-                self.visualize_pointing_direction_with_landmarks(
-                    image_path, person, direction, keypoints
-                )
-
-                resp.direction = direction
-        else:
-            resp.direction = "NONE"
-
-        self.counter += 1
-        return resp
-
-    def store_image(self, img):
-        package_path = rospkg.RosPack().get_path("lasr_vision_pointing")
-        os.chdir(os.path.abspath(os.path.join(package_path, "images")))
-        cv2.imwrite(f"image{self.counter}.jpg", img)
-
-    def detect_keypoints(self, img):
-        img_bridge = CvBridge().imgmsg_to_cv2(img, desired_encoding="passthrough")
-        img_rgb = cv2.cvtColor(img_bridge, cv2.COLOR_BGR2RGB)
-
-        # store the image
-        self.store_image(img_rgb)
-
-        results = self.pose.process(img_rgb)
-
-        keypoints = []
-        if results.pose_landmarks:
-            for landmark in results.pose_landmarks.landmark:
-                x = int(landmark.x * img.width)
-                y = int(landmark.y * img.height)
-                keypoints.append((x, y))
-
-        return keypoints
-
-    def detection(self, img):
-        request = YoloDetectionRequest()
-        request.image_raw = img  # sensor_msgs/Image
-        request.dataset = "yolov8n-seg.pt"  # YOLOv8 model, auto-downloads
-        request.confidence = 0.7  # minimum confidence to include in results
-        request.nms = 0.4  # non maximal supression
-
-        # send request
-        response = self.detect_service(request)
-
-        result = []
-        for detection in response.detected_objects:
-            if detection.name == "person":
-                # cords of person in image
-                result.append(detection.xywh)
-
-        return result
-
-    def determine_pointing_direction(self, keypoints, buffer_width=50):
-        # Ensure keypoints are available
-        if (
-            len(keypoints) >= 7
-        ):  # Ensure we have at least 7 keypoints for the upper body
-            # Extract relevant keypoints for shoulders and wrists
-            left_shoulder = keypoints[self.mp_pose.PoseLandmark.LEFT_SHOULDER.value]
-            right_shoulder = keypoints[self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
-            left_wrist = keypoints[self.mp_pose.PoseLandmark.LEFT_WRIST.value]
-            right_wrist = keypoints[self.mp_pose.PoseLandmark.RIGHT_WRIST.value]
-
-            # Ensure all keypoints are detected
-            if left_shoulder and right_shoulder and left_wrist and right_wrist:
-                # Calculate the x-coordinate difference between shoulders and wrists
-                left_diff = left_wrist[0] - left_shoulder[0]
-                right_diff = right_shoulder[0] - right_wrist[0]
-
-                # Determine pointing direction based on the difference in x-coordinates
-                if abs(left_diff - right_diff) < buffer_width:
-                    return "FORWARDS"
-                elif abs(left_diff) > buffer_width and abs(left_diff) > abs(right_diff):
-                    return "LEFT" if left_diff > 0 else "RIGHT"
-                elif abs(right_diff) > buffer_width and abs(right_diff) > abs(
-                    left_diff
-                ):
-                    return "RIGHT" if right_diff > 0 else "LEFT"
-
-        # Default: Determine direction based on the relative position to the center of the image
-        return "NONE"
-
-    def visualize_pointing_direction_with_landmarks(
-        self, image_path, person_bbox, pointing_direction, keypoints
-    ):
-        # Load image
-        img = cv2.imread(image_path)
-
-        # Extract person bbox coordinates
-        x, y, w, h = person_bbox
-        # Calculate center of bbox
-        center_x = x + w // 2
-        center_y = y + h // 2
-
-        # Calculate endpoint of arrow based on pointing direction
-        arrow_length = min(w, h) // 2
-        if pointing_direction == "LEFT":
-            endpoint = (center_x - arrow_length, center_y)
-        elif pointing_direction == "RIGHT":
-            endpoint = (center_x + arrow_length, center_y)
-        elif pointing_direction == "FORWARDS":
-            endpoint = (center_x, center_y)
-        else:
-            return  # No pointing direction detected
-
-        # Draw arrow on image
-        color = (0, 255, 0)  # Green color
-        thickness = 2
-        cv2.arrowedLine(img, (center_x, center_y), endpoint, color, thickness)
-
-        # Draw landmarks (keypoints) on the image
-        for keypoint in keypoints:
-            if keypoint:
-                cv2.circle(img, keypoint, 3, (0, 0, 255), -1)  # Red color for landmarks
-
-        # Store image with pointing direction visualization and landmarks
-        output_image_path = f"landmark_image{self.counter}.jpg"  # Change the output image path as needed
-        cv2.imwrite(output_image_path, img)
-
-
-if __name__ == "__main__":
-    rospy.init_node("pointing_detector")
-    pointer = PointingDetector()
-    rospy.loginfo("Pointing Detector is running")
-    rospy.spin()
diff --git a/common/vision/lasr_vision_pointing/setup.py b/common/vision/lasr_vision_pointing/setup.py
deleted file mode 100644
index e094797e8..000000000
--- a/common/vision/lasr_vision_pointing/setup.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-
-from distutils.core import setup
-from catkin_pkg.python_setup import generate_distutils_setup
-
-setup_args = generate_distutils_setup(
-    packages=["lasr_vision_pointing"], package_dir={"": "src"}
-)
-
-setup(**setup_args)
diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml
index 302eb6083..2b98cc74e 100644
--- a/skills/config/motions.yaml
+++ b/skills/config/motions.yaml
@@ -39,4 +39,29 @@ play_motion:
       joints: [head_1_joint, head_2_joint]
       points:
       - positions: [0.0, 0.0]
+        time_from_start: 0.0
+    raise_torso:
+      joints: [torso_lift_joint]
+      points:
+      - positions: [0.5]
+        time_from_start: 0.0
+    pointing_to_the_right:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [2.70, -1.47, 0.38, 1.92, 1.66, 0.42, 0.08]
+        time_from_start: 0.0
+    pointing_to_the_left:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [0.32, -1.42, -0.39, 2.05, 1.62, 0.56, 0.09]
+        time_from_start: 0.0
+    raising_right_arm:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [0.12, 0.09, -3.16, 1.65, 1.53, 0.00, 0.13]
+        time_from_start: 0.0
+    raising_left_arm:
+      joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+      points:
+      - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13]
         time_from_start: 0.0
\ No newline at end of file
diff --git a/skills/src/lasr_skills/__init__.py b/skills/src/lasr_skills/__init__.py
index 736ee4823..4bceb19ca 100755
--- a/skills/src/lasr_skills/__init__.py
+++ b/skills/src/lasr_skills/__init__.py
@@ -21,3 +21,4 @@
 from .recognise import Recognise
 from .detect_pointing import DetectPointingDirection
 from .detect_gesture import DetectGesture
+from .find_gesture_person import FindGesturePerson
diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py
index 5ac565366..28d4f84cb 100755
--- a/skills/src/lasr_skills/detect_gesture.py
+++ b/skills/src/lasr_skills/detect_gesture.py
@@ -1,9 +1,14 @@
 #!/usr/bin/env python3
+from typing import Optional
 import smach
 import rospy
+import cv2
+import cv2_img
 from lasr_skills.vision import GetImage
+from lasr_skills import PlayMotion
 from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest
 from lasr_vision_msgs.msg import BodyPixMaskRequest
+from sensor_msgs.msg import Image
 
 
 class DetectGesture(smach.State):
@@ -11,16 +16,25 @@ class DetectGesture(smach.State):
     State for detecting gestures.
     """
 
-    def __init__(self, gesture_to_detect: str = "raising_left_hand"):
-        self.gesture_to_detect = gesture_to_detect
+    def __init__(
+        self,
+        gesture_to_detect: Optional[str] = None,
+        buffer_width: int = 50,
+    ):
+        """Optionally stores the gesture to detect. If None, it will infer the gesture from the keypoints."""
         smach.State.__init__(
             self,
             outcomes=["succeeded", "failed"],
             input_keys=["img_msg"],
             output_keys=["gesture_detected"],
         )
-
+        self.debug = debug
+        self.gesture_to_detect = gesture_to_detect
         self.body_pix_client = rospy.ServiceProxy("/bodypix/detect", BodyPixDetection)
+        self.debug_publisher = rospy.Publisher(
+            "/gesture_detection/debug", Image, queue_size=1
+        )
+        self.buffer_width = buffer_width
 
     def execute(self, userdata):
 
@@ -54,29 +68,67 @@ def execute(self, userdata):
                     "y": keypoint.xy[1],
                     "score": keypoint.score,
                 }
-
-        if self.gesture_to_detect == "raising_left_hand":
+        if (
+            self.gesture_to_detect == "raising_left_arm"
+            or self.gesture_to_detect is None
+        ):
             if part_info["leftWrist"]["y"] < part_info["leftShoulder"]["y"]:
-                userdata.gesture_detected = True
-            else:
-                userdata.gesture_detected = False
-        elif self.gesture_to_detect == "raising_right_hand":
+                self.gesture_to_detect = "raising_left_arm"
+        if (
+            self.gesture_to_detect == "raising_right_arm"
+            or self.gesture_to_detect is None
+        ):
             if part_info["rightWrist"]["y"] < part_info["rightShoulder"]["y"]:
-                userdata.gesture_detected = True
-            else:
-                userdata.gesture_detected = False
-        else:
-            raise ValueError("Invalid gesture to detect")
+                self.gesture_to_detect = "raising_right_arm"
+        if (
+            self.gesture_to_detect == "pointing_to_the_left"
+            or self.gesture_to_detect is None
+        ):
+            if (
+                part_info["leftWrist"]["x"] - self.buffer_width
+                > part_info["leftShoulder"]["x"]
+            ):
+                self.gesture_to_detect = "pointing_to_the_left"
+        if (
+            self.gesture_to_detect == "pointing_to_the_right"
+            or self.gesture_to_detect is None
+        ):
+            if (
+                part_info["rightShoulder"]["x"] - self.buffer_width
+                > part_info["rightWrist"]["x"]
+            ):
+                self.gesture_to_detect = "pointing_to_the_right"
+
+        if self.gesture_to_detect is None:
+            self.gesture_to_detect = "none"
+
+        userdata.gesture_detected = self.gesture_to_detect
+
+        cv2_gesture_img = cv2_img.msg_to_cv2_img(userdata.img_msg)
+        # Add text to the image
+        cv2.putText(
+            cv2_gesture_img,
+            self.gesture_to_detect,
+            (10, 30),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 255, 0),
+            2,
+            cv2.LINE_AA,
+        )
+        # Publish the image
+        self.debug_publisher.publish(cv2_img.cv2_img_to_msg(cv2_gesture_img))
 
         return "succeeded"
 
 
+### For example usage:
 class GestureDetectionSM(smach.StateMachine):
     """
     State machine for detecting gestures.
     """
 
-    def __init__(self, gesture_to_detect: str = "raising_left_hand"):
+    def __init__(self, gesture_to_detect: Optional[str] = None):
         smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
         self.gesture_to_detect = gesture_to_detect
         with self:
@@ -95,9 +147,22 @@ def __init__(self, gesture_to_detect: str = "raising_left_hand"):
 
 if __name__ == "__main__":
     rospy.init_node("gesture_detection_sm")
+    ### Example usage:
     while not rospy.is_shutdown():
         sm = GestureDetectionSM()
         sm.execute()
-        print("Raising Left Hand Detected:", sm.userdata.gesture_detected)
+        gesture_state = PlayMotion(motion_name=sm.userdata.gesture_detected)
+        gesture_sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+        with gesture_sm:
+            smach.StateMachine.add(
+                "GESTURE_STATE",
+                gesture_state,
+                transitions={
+                    "succeeded": "succeeded",
+                    "aborted": "failed",
+                    "preempted": "failed",
+                },
+            )
+        gesture_sm.execute()
 
     rospy.spin()
diff --git a/skills/src/lasr_skills/detect_pointing.py b/skills/src/lasr_skills/detect_pointing.py
deleted file mode 100644
index 21895dfc1..000000000
--- a/skills/src/lasr_skills/detect_pointing.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import rospy
-import smach
-from sensor_msgs.msg import Image
-
-from lasr_vision_msgs.srv import PointingDirection
-
-
-class DetectPointingDirection(smach.State):
-    def __init__(self, image_topic: str = "/xtion/rgb/image_raw"):
-        smach.State.__init__(
-            self, outcomes=["succeeded", "failed"], output_keys=["pointing_direction"]
-        )
-        self._image_topic = image_topic
-        self._pointing_service = rospy.ServiceProxy(
-            "/pointing_detection_service", PointingDirection
-        )
-        self._pointing_service.wait_for_service()
-
-    def execute(self, userdata):
-
-        img_msg = rospy.wait_for_message(self._image_topic, Image)
-        resp = self._pointing_service(img_msg)
-        if resp.direction == "NONE" or resp.direction == "FORWARDS":
-            return "failed"
-        userdata.pointing_direction = resp.direction
-        return "succeeded"
diff --git a/skills/src/lasr_skills/find_gesture_person.py b/skills/src/lasr_skills/find_gesture_person.py
new file mode 100644
index 000000000..ae195494a
--- /dev/null
+++ b/skills/src/lasr_skills/find_gesture_person.py
@@ -0,0 +1,221 @@
+import smach
+import rospy
+
+from lasr_skills import Detect3D, GoToLocation, DetectGesture, PlayMotion
+from lasr_skills.vision import GetImage
+import navigation_helpers
+
+from geometry_msgs.msg import Pose, PoseWithCovarianceStamped, Point, Quaternion
+
+
+from typing import List, Union
+
+
+class FindGesturePerson(smach.StateMachine):
+    class GetLocation(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["location_index", "waypoints"],
+                output_keys=["location"],
+            )
+
+        def execute(self, userdata):
+            userdata.location = userdata.waypoints[userdata.location_index]
+            return "succeeded"
+
+    class GetPose(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                output_keys=["current_pose"],
+            )
+
+        def execute(self, userdata):
+            userdata.current_pose = rospy.wait_for_message(
+                "/amcl_pose", PoseWithCovarianceStamped
+            ).pose.pose
+            return "succeeded"
+
+    class ComputePath(smach.State):
+        def __init__(self, waypoints: List[Pose]):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["current_pose"],
+                output_keys=["waypoints"],
+            )
+            self._waypoints = waypoints
+
+        def execute(self, userdata):
+            userdata.waypoints = navigation_helpers.min_hamiltonian_path(
+                userdata.current_pose, self._waypoints
+            )
+            return "succeeded"
+
+    class HandleDetections(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["detections_3d"],
+                output_keys=["person_point"],
+            )
+
+        def execute(self, userdata):
+            if len(userdata.detections_3d.detected_objects) == 0:
+                rospy.logwarn("No person detected, returning failed.")
+                return "failed"
+            userdata.person_point = userdata.detections_3d.detected_objects[0].point
+            return "succeeded"
+
+    class HandleResponse(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["gesture_detected"],
+            )
+
+        def execute(self, userdata):
+            if userdata.gesture_detected:
+                return "succeeded"
+            return "failed"
+
+    def __init__(
+        self,
+        gesture: str,
+        waypoints: Union[List[Pose], None] = None,
+        location_param: Union[str, None] = None,
+    ):
+        smach.StateMachine.__init__(
+            self, outcomes=["succeeded", "failed"], output_keys=["person_point"]
+        )
+        if waypoints is None and location_param is None:
+            raise ValueError("Either waypoints or location_param must be provided")
+
+        if gesture == "person raising their left arm":
+            self.gesture = "raising_left_arm"
+        elif gesture == "person raising their right arm":
+            self.gesture = "raising_right_arm"
+        elif gesture == "person pointing to the right":
+            self.gesture == "pointing_to_the_right"
+        elif gesture == "person pointing to the left":
+            self.gesture == "pointing_to_the_left"
+        else:
+            raise ValueError(f"Gesture {gesture} not recognized")
+
+        if waypoints is None:
+            waypoints_to_iterate: List[Pose] = []
+
+            room = rospy.get_param(location_param)
+            beacons = room["beacons"]
+            for beacon in beacons:
+                waypoint: Pose = Pose(
+                    position=Point(**beacons[beacon]["near_pose"]["position"]),
+                    orientation=Quaternion(
+                        **beacons[beacon]["near_pose"]["orientation"]
+                    ),
+                )
+                waypoints_to_iterate.append(waypoint)
+        else:
+            waypoints_to_iterate: List[Pose] = waypoints
+
+        with self:
+            smach.StateMachine.add(
+                "GET_POSE",
+                self.GetPose(),
+                transitions={"succeeded": "COMPUTE_PATH", "failed": "failed"},
+            )
+
+            smach.StateMachine.add(
+                "COMPUTE_PATH",
+                self.ComputePath(waypoints_to_iterate),
+                transitions={"succeeded": "WAYPOINT_ITERATOR", "failed": "failed"},
+            )
+
+            waypoint_iterator = smach.Iterator(
+                outcomes=["succeeded", "failed"],
+                it=lambda: range(len(waypoints_to_iterate)),
+                it_label="location_index",
+                input_keys=["waypoints"],
+                output_keys=["person_point"],
+                exhausted_outcome="failed",
+            )
+
+            with waypoint_iterator:
+                container_sm = smach.StateMachine(
+                    outcomes=["succeeded", "failed", "continue"],
+                    input_keys=["location_index", "waypoints"],
+                    output_keys=["person_point"],
+                )
+
+                with container_sm:
+                    smach.StateMachine.add(
+                        "GET_LOCATION",
+                        self.GetLocation(),
+                        transitions={"succeeded": "GO_TO_LOCATION", "failed": "failed"},
+                    )
+
+                    smach.StateMachine.add(
+                        "GO_TO_LOCATION",
+                        GoToLocation(),
+                        transitions={
+                            "succeeded": "DETECT3D",
+                            "failed": "failed",
+                        },
+                    )
+                    smach.StateMachine.add(
+                        "DETECT3D",
+                        Detect3D(filter=["person"]),
+                        transitions={
+                            "succeeded": "HANDLE_DETECTIONS",
+                            "failed": "failed",
+                        },
+                    )
+                    smach.StateMachine.add(
+                        "HANDLE_DETECTIONS",
+                        self.HandleDetections(),
+                        transitions={
+                            "succeeded": "PLAY_MOTION",
+                            "failed": "continue",
+                        },
+                    ),
+                    smach.StateMachine.add(
+                        "PLAY_MOTION",
+                        PlayMotion(motion_name="raise_torso"),
+                        transitions={
+                            "succeeded": "GET_IMAGE",
+                            "aborted": "failed",
+                            "preempted": "failed",
+                        },
+                    ),
+                    smach.StateMachine.add(
+                        "GET_IMAGE",
+                        GetImage(),
+                        transitions={"succeeded": "CHECK_GESTURE", "failed": "failed"},
+                    )
+                    smach.StateMachine.add(
+                        "CHECK_GESTURE",
+                        DetectGesture(self.gesture),
+                        transitions={
+                            "succeeded": "HANDLE_RESPONSE",
+                            "failed": "failed",
+                        },
+                    )
+                    smach.StateMachine.add(
+                        "HANDLE_RESPONSE",
+                        self.HandleResponse(),
+                        transitions={"succeeded": "succeeded", "failed": "continue"},
+                    )
+
+                waypoint_iterator.set_contained_state(
+                    "CONTAINER_STATE", container_sm, loop_outcomes=["continue"]
+                )
+            smach.StateMachine.add(
+                "WAYPOINT_ITERATOR",
+                waypoint_iterator,
+                {"succeeded": "succeeded", "failed": "failed"},
+            )
diff --git a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
index 39cd67ce5..00c743c20 100644
--- a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
+++ b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
@@ -4,7 +4,7 @@
     WaitForPerson,
     DetectPointingDirection,
     Say,
-    PlayMotion,
+    DetectGesture,
     ReceiveObject,
 )
 
@@ -13,6 +13,24 @@
 
 class CarryMyLuggage(smach.StateMachine):
 
+    class ProcessPointingDirection(smach.State):
+        def __init__(self):
+            smach.State.__init__(
+                self,
+                outcomes=["succeeded", "failed"],
+                input_keys=["gesture_detected"],
+                output_keys=["pointing_direction"],
+            )
+
+        def execute(self, userdata):
+            if userdata.gesture_detected == "pointing_to_the_left":
+                userdata.pointing_direction = "left"
+            elif userdata.gesture_detected == "pointing_to_the_right":
+                userdata.pointing_direction = "right"
+            else:
+                return "failed"
+            return "succeeded"
+
     def __init__(self):
         smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
 
@@ -28,7 +46,16 @@ def __init__(self):
 
             smach.StateMachine.add(
                 "DETECT_POINTING_DIRECTION",
-                DetectPointingDirection(),
+                DetectGesture(),
+                transitions={
+                    "succeeded": "PROCESS_POINTING_DIRECTION",
+                    "failed": "SAY_FAILED_POINTING",
+                },
+            )
+
+            smach.StateMachine.add(
+                "PROCESS_POINTING_DIRECTION",
+                CarryMyLuggage.ProcessPointingDirection(),
                 transitions={
                     "succeeded": "SAY_BAG",
                     "failed": "SAY_FAILED_POINTING",
diff --git a/tasks/gpsr/config/lab.yaml b/tasks/gpsr/config/lab.yaml
index 96cd8cd52..f62e2c4c8 100644
--- a/tasks/gpsr/config/lab.yaml
+++ b/tasks/gpsr/config/lab.yaml
@@ -9,8 +9,8 @@ arena:
       beacons:
         tv_desk:
           near_pose:
-            position: {x: 0.96, y: 0.82, z: 0.0}
-            orientation: {x: 0.0, y: 0.0, z: 0.76, w: 0.65}
+            position:  {x: 0.989603867147269, y: 0.47950265241289325, z: 0.0}
+            orientation: {x: 0.0, y: 0.0, z: 0.738945375913055, w: 0.6737653385391044}
           near_polygon: [[-0.11, 0.00], [-0.08, 2.36], [1.84, 2.75], [2.02, 1.31]]
           search_pose:
             position: {x: 0.97, y: 1.60, z: 0.0}
@@ -18,8 +18,8 @@ arena:
           search_polygon: [[0.40, 1.92], [0.31, 2.45], [1.36, 2.70], [1.456, 2.17]]
         window_desk:
           near_pose:
-            position: {x: 2.38, y: -3.0, z: 0.0}
-            orientation: {x: 0.0, y: 0.0, z: -0.62, w: 0.79}
+            position: {x: 2.6771759491107954, y: -2.2998999761206105, z: 0.0}
+            orientation: {x: 0.0, y: 0.0, z: -0.6148810103509047, w: 0.7886198977389871}
           near_polygon: [[3.34, -3.27], [3.75, -4.80], [2.47, -5.20], [1.75, -3.58]]
           search_pose:
             position: {x: 2.47, y: -3.73, z: 0.0}
diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py
index 0208686dd..2e005919b 100644
--- a/tasks/gpsr/src/gpsr/state_machine_factory.py
+++ b/tasks/gpsr/src/gpsr/state_machine_factory.py
@@ -2,7 +2,7 @@
 import rospy
 import smach
 from typing import Dict, List
-from lasr_skills import GoToLocation, FindNamedPerson  # type: ignore
+from lasr_skills import GoToLocation, FindNamedPerson, FindGesturePerson  # type: ignore
 from gpsr.states import Talk
 
 
@@ -43,11 +43,25 @@ def _handle_find_command(self, command_param: Dict):
 
     def _handle_talk_command(self, command_param: Dict):
         if "gesture" in command_param:
-            raise NotImplementedError("Talk command with gesture not implemented")
-        elif "text" in command_param:
+            if not "location" in command_param:
+                raise ValueError(
+                    "Talk command with gesture but no room in command parameters"
+                )
+            location_param_room = f"/gpsr/arena/rooms/{command_param['location']}"
+            self.sm.add(
+                f"STATE_{self._increment_state_count()}",
+                FindGesturePerson(
+                    location_param=location_param_room, gesture=command_param["gesture"]
+                ),
+                transitions={
+                    "succeeded": f"STATE_{self.state_count + 1}",
+                    "failed": "failed",
+                },
+            )
+        if "talk" in command_param:
             self.sm.add(
                 f"STATE_{self._increment_state_count()}",
-                Talk(text=command_param["text"]),
+                Talk(talk_phrase=command_param["talk"]),
                 transitions={
                     "succeeded": f"STATE_{self.state_count + 1}",
                     "failed": "failed",
@@ -143,8 +157,8 @@ def build_state_machine(self) -> smach.StateMachine:
                     raise ValueError(f"Invalid command verb: {command_verb}")
 
             self.sm.add(
-                f"STATE_{self.state_count}",
-                Talk(text="I have completed the task."),
+                f"STATE_{self.state_count+1}",
+                Talk(talk_phrase="I have completed the task."),
                 transitions={"succeeded": "succeeded", "failed": "failed"},
             )