From 953d35c7851b6117aa5b77fa26fb8df9fc26e8c0 Mon Sep 17 00:00:00 2001 From: Matt Barker <105945282+m-barker@users.noreply.github.com> Date: Tue, 14 May 2024 10:47:43 +0100 Subject: [PATCH] Gesture detection (#202) * fix: incorrect remapping keys for handover * fix: remove speech server in launch file * feat: add find gesture person skill, with support for raisied arms * feat: add pointing to gesture detection * feat: add debug publisher to gesture detection * feat: add mimicing of gesture detected by Tiago * feat: update CML with new pointing API * feat: update GPSR API with new gesture detection skill * chore: remove old pointing skill & service * feat: make debug always be published --- .../lasr_vision_pointing/CMakeLists.txt | 213 ----------------- .../lasr_vision_pointing/images/.gitkeep | 0 .../vision/lasr_vision_pointing/package.xml | 77 ------ .../lasr_vision_pointing/requirements.in | 1 - .../lasr_vision_pointing/requirements.txt | 24 -- .../scripts/detect_pointing.py | 175 -------------- common/vision/lasr_vision_pointing/setup.py | 10 - skills/config/motions.yaml | 25 ++ skills/src/lasr_skills/__init__.py | 1 + skills/src/lasr_skills/detect_gesture.py | 97 ++++++-- skills/src/lasr_skills/detect_pointing.py | 26 --- skills/src/lasr_skills/find_gesture_person.py | 221 ++++++++++++++++++ .../src/carry_my_luggage/state_machine.py | 31 ++- tasks/gpsr/config/lab.yaml | 8 +- tasks/gpsr/src/gpsr/state_machine_factory.py | 26 ++- 15 files changed, 381 insertions(+), 554 deletions(-) delete mode 100644 common/vision/lasr_vision_pointing/CMakeLists.txt delete mode 100644 common/vision/lasr_vision_pointing/images/.gitkeep delete mode 100644 common/vision/lasr_vision_pointing/package.xml delete mode 100644 common/vision/lasr_vision_pointing/requirements.in delete mode 100644 common/vision/lasr_vision_pointing/requirements.txt delete mode 100644 common/vision/lasr_vision_pointing/scripts/detect_pointing.py delete mode 100644 common/vision/lasr_vision_pointing/setup.py delete mode 100644 skills/src/lasr_skills/detect_pointing.py create mode 100644 skills/src/lasr_skills/find_gesture_person.py diff --git a/common/vision/lasr_vision_pointing/CMakeLists.txt b/common/vision/lasr_vision_pointing/CMakeLists.txt deleted file mode 100644 index 66f1cf831..000000000 --- a/common/vision/lasr_vision_pointing/CMakeLists.txt +++ /dev/null @@ -1,213 +0,0 @@ -cmake_minimum_required(VERSION 3.0.2) -project(lasr_vision_pointing) - -## Compile as C++11, supported in ROS Kinetic and newer -# add_compile_options(-std=c++11) - -## Find catkin macros and libraries -## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) -## is used, also find other catkin packages -find_package(catkin REQUIRED COMPONENTS - geometry_msgs - sensor_msgs - roscpp - rospy - std_msgs - catkin_virtualenv -) - -## System dependencies are found with CMake's conventions -# find_package(Boost REQUIRED COMPONENTS system) - - -## Uncomment this if the package has a setup.py. This macro ensures -## modules and global scripts declared therein get installed -## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html -catkin_python_setup() -catkin_generate_virtualenv( - INPUT_REQUIREMENTS requirements.in - PYTHON_INTERPRETER python3.10 -) - -################################################ -## Declare ROS messages, services and actions ## -################################################ - -## To declare and build messages, services or actions from within this -## package, follow these steps: -## * Let MSG_DEP_SET be the set of packages whose message types you use in -## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). -## * In the file package.xml: -## * add a build_depend tag for "message_generation" -## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET -## * If MSG_DEP_SET isn't empty the following dependency has been pulled in -## but can be declared for certainty nonetheless: -## * add a exec_depend tag for "message_runtime" -## * In this file (CMakeLists.txt): -## * add "message_generation" and every package in MSG_DEP_SET to -## find_package(catkin REQUIRED COMPONENTS ...) -## * add "message_runtime" and every package in MSG_DEP_SET to -## catkin_package(CATKIN_DEPENDS ...) -## * uncomment the add_*_files sections below as needed -## and list every .msg/.srv/.action file to be processed -## * uncomment the generate_messages entry below -## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) - -## Generate messages in the 'msg' folder -# add_message_files( -# FILES -# Message1.msg -# Message2.msg -# ) - -## Generate services in the 'srv' folder -# add_service_files( -# FILES -# PointingDirection.srv -# ) - -## Generate actions in the 'action' folder -# add_action_files( -# FILES -# Action1.action -# Action2.action -# ) - -## Generate added messages and services with any dependencies listed here -# generate_messages( -# DEPENDENCIES -# geometry_msgs -# std_msgs -# sensor_msgs -# ) - -################################################ -## Declare ROS dynamic reconfigure parameters ## -################################################ - -## To declare and build dynamic reconfigure parameters within this -## package, follow these steps: -## * In the file package.xml: -## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" -## * In this file (CMakeLists.txt): -## * add "dynamic_reconfigure" to -## find_package(catkin REQUIRED COMPONENTS ...) -## * uncomment the "generate_dynamic_reconfigure_options" section below -## and list every .cfg file to be processed - -## Generate dynamic reconfigure parameters in the 'cfg' folder -# generate_dynamic_reconfigure_options( -# cfg/DynReconf1.cfg -# cfg/DynReconf2.cfg -# ) - -################################### -## catkin specific configuration ## -################################### -## The catkin_package macro generates cmake config files for your package -## Declare things to be passed to dependent projects -## INCLUDE_DIRS: uncomment this if your package contains header files -## LIBRARIES: libraries you create in this project that dependent projects also need -## CATKIN_DEPENDS: catkin_packages dependent projects also need -## DEPENDS: system dependencies of this project that dependent projects also need -catkin_package( -# INCLUDE_DIRS include -# LIBRARIES lasr_vision_pointing - CATKIN_DEPENDS geometry_msgs roscpp rospy std_msgs sensor_msgs -# DEPENDS system_lib -) - -########### -## Build ## -########### - -## Specify additional locations of header files -## Your package locations should be listed before other locations -include_directories( -# include - ${catkin_INCLUDE_DIRS} -) - -## Declare a C++ library -# add_library(${PROJECT_NAME} -# src/${PROJECT_NAME}/lasr_vision_pointing.cpp -# ) - -## Add cmake target dependencies of the library -## as an example, code may need to be generated before libraries -## either from message generation or dynamic reconfigure -# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) - -## Declare a C++ executable -## With catkin_make all packages are built within a single CMake context -## The recommended prefix ensures that target names across packages don't collide -# add_executable(${PROJECT_NAME}_node src/lasr_vision_pointing_node.cpp) - -## Rename C++ executable without prefix -## The above recommended prefix causes long target names, the following renames the -## target back to the shorter version for ease of user use -## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" -# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") - -## Add cmake target dependencies of the executable -## same as for the library above -# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) - -## Specify libraries to link a library or executable target against -# target_link_libraries(${PROJECT_NAME}_node -# ${catkin_LIBRARIES} -# ) - -############# -## Install ## -############# - -# all install targets should use catkin DESTINATION variables -# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html - -## Mark executable scripts (Python etc.) for installation -## in contrast to setup.py, you can choose the destination -catkin_install_python(PROGRAMS - scripts/detect_pointing.py - DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} -) - -## Mark executables for installation -## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html -# install(TARGETS ${PROJECT_NAME}_node -# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} -# ) - -## Mark libraries for installation -## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html -# install(TARGETS ${PROJECT_NAME} -# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} -# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} -# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} -# ) - -## Mark cpp header files for installation -# install(DIRECTORY include/${PROJECT_NAME}/ -# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} -# FILES_MATCHING PATTERN "*.h" -# PATTERN ".svn" EXCLUDE -# ) - -## Mark other files for installation (e.g. launch and bag files, etc.) -install(FILES - requirements.txt - DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} -) - -############# -## Testing ## -############# - -## Add gtest based cpp test target and link libraries -# catkin_add_gtest(${PROJECT_NAME}-test test/test_lasr_vision_pointing.cpp) -# if(TARGET ${PROJECT_NAME}-test) -# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) -# endif() - -## Add folders to be run by python nosetests -# catkin_add_nosetests(test) diff --git a/common/vision/lasr_vision_pointing/images/.gitkeep b/common/vision/lasr_vision_pointing/images/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/common/vision/lasr_vision_pointing/package.xml b/common/vision/lasr_vision_pointing/package.xml deleted file mode 100644 index d99ef49f8..000000000 --- a/common/vision/lasr_vision_pointing/package.xml +++ /dev/null @@ -1,77 +0,0 @@ - - - lasr_vision_pointing - 0.0.0 - The lasr_vision_pointing package - - - - - Serge Alkhalil - - - - - - TODO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - catkin - geometry_msgs - roscpp - rospy - std_msgs - sensor_msgs - geometry_msgs - roscpp - rospy - std_msgs - geometry_msgs - roscpp - rospy - std_msgs - sensor_msgs - lasr_vision_msgs - - - catkin_virtualenv - - - - - requirements.txt - - - diff --git a/common/vision/lasr_vision_pointing/requirements.in b/common/vision/lasr_vision_pointing/requirements.in deleted file mode 100644 index c1b708709..000000000 --- a/common/vision/lasr_vision_pointing/requirements.in +++ /dev/null @@ -1 +0,0 @@ -mediapipe==0.10.10 \ No newline at end of file diff --git a/common/vision/lasr_vision_pointing/requirements.txt b/common/vision/lasr_vision_pointing/requirements.txt deleted file mode 100644 index e6ef834ab..000000000 --- a/common/vision/lasr_vision_pointing/requirements.txt +++ /dev/null @@ -1,24 +0,0 @@ -absl-py==2.1.0 # via mediapipe -attrs==23.2.0 # via mediapipe -cffi==1.16.0 # via sounddevice -contourpy==1.2.1 # via matplotlib -cycler==0.12.1 # via matplotlib -flatbuffers==24.3.25 # via mediapipe -fonttools==4.51.0 # via matplotlib -jax==0.4.26 # via mediapipe -kiwisolver==1.4.5 # via matplotlib -matplotlib==3.8.4 # via mediapipe -mediapipe==0.10.10 # via -r requirements.in -ml-dtypes==0.4.0 # via jax -numpy==1.26.4 # via contourpy, jax, matplotlib, mediapipe, ml-dtypes, opencv-contrib-python, opt-einsum, scipy -opencv-contrib-python==4.9.0.80 # via mediapipe -opt-einsum==3.3.0 # via jax -packaging==24.0 # via matplotlib -pillow==10.3.0 # via matplotlib -protobuf==3.20.3 # via mediapipe -pycparser==2.22 # via cffi -pyparsing==3.1.2 # via matplotlib -python-dateutil==2.9.0.post0 # via matplotlib -scipy==1.13.0 # via jax -six==1.16.0 # via python-dateutil -sounddevice==0.4.6 # via mediapipe diff --git a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py b/common/vision/lasr_vision_pointing/scripts/detect_pointing.py deleted file mode 100644 index b56e0d14e..000000000 --- a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python3 - -import os -import cv2 -import rospkg -import rospy -import mediapipe as mp -from cv_bridge import CvBridge -from lasr_vision_msgs.srv import ( - YoloDetectionRequest, - YoloDetection, - PointingDirection, - PointingDirectionResponse, - PointingDirectionRequest, -) - - -class PointingDetector: - def __init__(self): - self.detect_service = rospy.ServiceProxy("/yolov8/detect", YoloDetection) - self.service = rospy.Service( - "/pointing_detection_service", PointingDirection, self.excute - ) - self.counter = 0 - - # Load MediaPipe Pose model - self.mp_pose = mp.solutions.pose - self.pose = self.mp_pose.Pose() - - def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse: - img = req.image_raw - resp = PointingDirectionResponse() - people_bounding_box = self.detection(img) - - if people_bounding_box: - for person in people_bounding_box: - keypoints = self.detect_keypoints( - img - ) # Detect keypoints using MediaPipe - direction = self.determine_pointing_direction( - keypoints, buffer_width=25 - ) - rospy.loginfo(f"Person detected pointing: {direction}") - - # Visualize pointing direction with landmarks - image_path = os.path.join( - rospkg.RosPack().get_path("lasr_vision_pointing"), - "images", - f"image{self.counter}.jpg", - ) - self.visualize_pointing_direction_with_landmarks( - image_path, person, direction, keypoints - ) - - resp.direction = direction - else: - resp.direction = "NONE" - - self.counter += 1 - return resp - - def store_image(self, img): - package_path = rospkg.RosPack().get_path("lasr_vision_pointing") - os.chdir(os.path.abspath(os.path.join(package_path, "images"))) - cv2.imwrite(f"image{self.counter}.jpg", img) - - def detect_keypoints(self, img): - img_bridge = CvBridge().imgmsg_to_cv2(img, desired_encoding="passthrough") - img_rgb = cv2.cvtColor(img_bridge, cv2.COLOR_BGR2RGB) - - # store the image - self.store_image(img_rgb) - - results = self.pose.process(img_rgb) - - keypoints = [] - if results.pose_landmarks: - for landmark in results.pose_landmarks.landmark: - x = int(landmark.x * img.width) - y = int(landmark.y * img.height) - keypoints.append((x, y)) - - return keypoints - - def detection(self, img): - request = YoloDetectionRequest() - request.image_raw = img # sensor_msgs/Image - request.dataset = "yolov8n-seg.pt" # YOLOv8 model, auto-downloads - request.confidence = 0.7 # minimum confidence to include in results - request.nms = 0.4 # non maximal supression - - # send request - response = self.detect_service(request) - - result = [] - for detection in response.detected_objects: - if detection.name == "person": - # cords of person in image - result.append(detection.xywh) - - return result - - def determine_pointing_direction(self, keypoints, buffer_width=50): - # Ensure keypoints are available - if ( - len(keypoints) >= 7 - ): # Ensure we have at least 7 keypoints for the upper body - # Extract relevant keypoints for shoulders and wrists - left_shoulder = keypoints[self.mp_pose.PoseLandmark.LEFT_SHOULDER.value] - right_shoulder = keypoints[self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value] - left_wrist = keypoints[self.mp_pose.PoseLandmark.LEFT_WRIST.value] - right_wrist = keypoints[self.mp_pose.PoseLandmark.RIGHT_WRIST.value] - - # Ensure all keypoints are detected - if left_shoulder and right_shoulder and left_wrist and right_wrist: - # Calculate the x-coordinate difference between shoulders and wrists - left_diff = left_wrist[0] - left_shoulder[0] - right_diff = right_shoulder[0] - right_wrist[0] - - # Determine pointing direction based on the difference in x-coordinates - if abs(left_diff - right_diff) < buffer_width: - return "FORWARDS" - elif abs(left_diff) > buffer_width and abs(left_diff) > abs(right_diff): - return "LEFT" if left_diff > 0 else "RIGHT" - elif abs(right_diff) > buffer_width and abs(right_diff) > abs( - left_diff - ): - return "RIGHT" if right_diff > 0 else "LEFT" - - # Default: Determine direction based on the relative position to the center of the image - return "NONE" - - def visualize_pointing_direction_with_landmarks( - self, image_path, person_bbox, pointing_direction, keypoints - ): - # Load image - img = cv2.imread(image_path) - - # Extract person bbox coordinates - x, y, w, h = person_bbox - # Calculate center of bbox - center_x = x + w // 2 - center_y = y + h // 2 - - # Calculate endpoint of arrow based on pointing direction - arrow_length = min(w, h) // 2 - if pointing_direction == "LEFT": - endpoint = (center_x - arrow_length, center_y) - elif pointing_direction == "RIGHT": - endpoint = (center_x + arrow_length, center_y) - elif pointing_direction == "FORWARDS": - endpoint = (center_x, center_y) - else: - return # No pointing direction detected - - # Draw arrow on image - color = (0, 255, 0) # Green color - thickness = 2 - cv2.arrowedLine(img, (center_x, center_y), endpoint, color, thickness) - - # Draw landmarks (keypoints) on the image - for keypoint in keypoints: - if keypoint: - cv2.circle(img, keypoint, 3, (0, 0, 255), -1) # Red color for landmarks - - # Store image with pointing direction visualization and landmarks - output_image_path = f"landmark_image{self.counter}.jpg" # Change the output image path as needed - cv2.imwrite(output_image_path, img) - - -if __name__ == "__main__": - rospy.init_node("pointing_detector") - pointer = PointingDetector() - rospy.loginfo("Pointing Detector is running") - rospy.spin() diff --git a/common/vision/lasr_vision_pointing/setup.py b/common/vision/lasr_vision_pointing/setup.py deleted file mode 100644 index e094797e8..000000000 --- a/common/vision/lasr_vision_pointing/setup.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python3 - -from distutils.core import setup -from catkin_pkg.python_setup import generate_distutils_setup - -setup_args = generate_distutils_setup( - packages=["lasr_vision_pointing"], package_dir={"": "src"} -) - -setup(**setup_args) diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml index 302eb6083..2b98cc74e 100644 --- a/skills/config/motions.yaml +++ b/skills/config/motions.yaml @@ -39,4 +39,29 @@ play_motion: joints: [head_1_joint, head_2_joint] points: - positions: [0.0, 0.0] + time_from_start: 0.0 + raise_torso: + joints: [torso_lift_joint] + points: + - positions: [0.5] + time_from_start: 0.0 + pointing_to_the_right: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [2.70, -1.47, 0.38, 1.92, 1.66, 0.42, 0.08] + time_from_start: 0.0 + pointing_to_the_left: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [0.32, -1.42, -0.39, 2.05, 1.62, 0.56, 0.09] + time_from_start: 0.0 + raising_right_arm: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [0.12, 0.09, -3.16, 1.65, 1.53, 0.00, 0.13] + time_from_start: 0.0 + raising_left_arm: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13] time_from_start: 0.0 \ No newline at end of file diff --git a/skills/src/lasr_skills/__init__.py b/skills/src/lasr_skills/__init__.py index 736ee4823..4bceb19ca 100755 --- a/skills/src/lasr_skills/__init__.py +++ b/skills/src/lasr_skills/__init__.py @@ -21,3 +21,4 @@ from .recognise import Recognise from .detect_pointing import DetectPointingDirection from .detect_gesture import DetectGesture +from .find_gesture_person import FindGesturePerson diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py index 5ac565366..28d4f84cb 100755 --- a/skills/src/lasr_skills/detect_gesture.py +++ b/skills/src/lasr_skills/detect_gesture.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 +from typing import Optional import smach import rospy +import cv2 +import cv2_img from lasr_skills.vision import GetImage +from lasr_skills import PlayMotion from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest from lasr_vision_msgs.msg import BodyPixMaskRequest +from sensor_msgs.msg import Image class DetectGesture(smach.State): @@ -11,16 +16,25 @@ class DetectGesture(smach.State): State for detecting gestures. """ - def __init__(self, gesture_to_detect: str = "raising_left_hand"): - self.gesture_to_detect = gesture_to_detect + def __init__( + self, + gesture_to_detect: Optional[str] = None, + buffer_width: int = 50, + ): + """Optionally stores the gesture to detect. If None, it will infer the gesture from the keypoints.""" smach.State.__init__( self, outcomes=["succeeded", "failed"], input_keys=["img_msg"], output_keys=["gesture_detected"], ) - + self.debug = debug + self.gesture_to_detect = gesture_to_detect self.body_pix_client = rospy.ServiceProxy("/bodypix/detect", BodyPixDetection) + self.debug_publisher = rospy.Publisher( + "/gesture_detection/debug", Image, queue_size=1 + ) + self.buffer_width = buffer_width def execute(self, userdata): @@ -54,29 +68,67 @@ def execute(self, userdata): "y": keypoint.xy[1], "score": keypoint.score, } - - if self.gesture_to_detect == "raising_left_hand": + if ( + self.gesture_to_detect == "raising_left_arm" + or self.gesture_to_detect is None + ): if part_info["leftWrist"]["y"] < part_info["leftShoulder"]["y"]: - userdata.gesture_detected = True - else: - userdata.gesture_detected = False - elif self.gesture_to_detect == "raising_right_hand": + self.gesture_to_detect = "raising_left_arm" + if ( + self.gesture_to_detect == "raising_right_arm" + or self.gesture_to_detect is None + ): if part_info["rightWrist"]["y"] < part_info["rightShoulder"]["y"]: - userdata.gesture_detected = True - else: - userdata.gesture_detected = False - else: - raise ValueError("Invalid gesture to detect") + self.gesture_to_detect = "raising_right_arm" + if ( + self.gesture_to_detect == "pointing_to_the_left" + or self.gesture_to_detect is None + ): + if ( + part_info["leftWrist"]["x"] - self.buffer_width + > part_info["leftShoulder"]["x"] + ): + self.gesture_to_detect = "pointing_to_the_left" + if ( + self.gesture_to_detect == "pointing_to_the_right" + or self.gesture_to_detect is None + ): + if ( + part_info["rightShoulder"]["x"] - self.buffer_width + > part_info["rightWrist"]["x"] + ): + self.gesture_to_detect = "pointing_to_the_right" + + if self.gesture_to_detect is None: + self.gesture_to_detect = "none" + + userdata.gesture_detected = self.gesture_to_detect + + cv2_gesture_img = cv2_img.msg_to_cv2_img(userdata.img_msg) + # Add text to the image + cv2.putText( + cv2_gesture_img, + self.gesture_to_detect, + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (0, 255, 0), + 2, + cv2.LINE_AA, + ) + # Publish the image + self.debug_publisher.publish(cv2_img.cv2_img_to_msg(cv2_gesture_img)) return "succeeded" +### For example usage: class GestureDetectionSM(smach.StateMachine): """ State machine for detecting gestures. """ - def __init__(self, gesture_to_detect: str = "raising_left_hand"): + def __init__(self, gesture_to_detect: Optional[str] = None): smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"]) self.gesture_to_detect = gesture_to_detect with self: @@ -95,9 +147,22 @@ def __init__(self, gesture_to_detect: str = "raising_left_hand"): if __name__ == "__main__": rospy.init_node("gesture_detection_sm") + ### Example usage: while not rospy.is_shutdown(): sm = GestureDetectionSM() sm.execute() - print("Raising Left Hand Detected:", sm.userdata.gesture_detected) + gesture_state = PlayMotion(motion_name=sm.userdata.gesture_detected) + gesture_sm = smach.StateMachine(outcomes=["succeeded", "failed"]) + with gesture_sm: + smach.StateMachine.add( + "GESTURE_STATE", + gesture_state, + transitions={ + "succeeded": "succeeded", + "aborted": "failed", + "preempted": "failed", + }, + ) + gesture_sm.execute() rospy.spin() diff --git a/skills/src/lasr_skills/detect_pointing.py b/skills/src/lasr_skills/detect_pointing.py deleted file mode 100644 index 21895dfc1..000000000 --- a/skills/src/lasr_skills/detect_pointing.py +++ /dev/null @@ -1,26 +0,0 @@ -import rospy -import smach -from sensor_msgs.msg import Image - -from lasr_vision_msgs.srv import PointingDirection - - -class DetectPointingDirection(smach.State): - def __init__(self, image_topic: str = "/xtion/rgb/image_raw"): - smach.State.__init__( - self, outcomes=["succeeded", "failed"], output_keys=["pointing_direction"] - ) - self._image_topic = image_topic - self._pointing_service = rospy.ServiceProxy( - "/pointing_detection_service", PointingDirection - ) - self._pointing_service.wait_for_service() - - def execute(self, userdata): - - img_msg = rospy.wait_for_message(self._image_topic, Image) - resp = self._pointing_service(img_msg) - if resp.direction == "NONE" or resp.direction == "FORWARDS": - return "failed" - userdata.pointing_direction = resp.direction - return "succeeded" diff --git a/skills/src/lasr_skills/find_gesture_person.py b/skills/src/lasr_skills/find_gesture_person.py new file mode 100644 index 000000000..ae195494a --- /dev/null +++ b/skills/src/lasr_skills/find_gesture_person.py @@ -0,0 +1,221 @@ +import smach +import rospy + +from lasr_skills import Detect3D, GoToLocation, DetectGesture, PlayMotion +from lasr_skills.vision import GetImage +import navigation_helpers + +from geometry_msgs.msg import Pose, PoseWithCovarianceStamped, Point, Quaternion + + +from typing import List, Union + + +class FindGesturePerson(smach.StateMachine): + class GetLocation(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["location_index", "waypoints"], + output_keys=["location"], + ) + + def execute(self, userdata): + userdata.location = userdata.waypoints[userdata.location_index] + return "succeeded" + + class GetPose(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + output_keys=["current_pose"], + ) + + def execute(self, userdata): + userdata.current_pose = rospy.wait_for_message( + "/amcl_pose", PoseWithCovarianceStamped + ).pose.pose + return "succeeded" + + class ComputePath(smach.State): + def __init__(self, waypoints: List[Pose]): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["current_pose"], + output_keys=["waypoints"], + ) + self._waypoints = waypoints + + def execute(self, userdata): + userdata.waypoints = navigation_helpers.min_hamiltonian_path( + userdata.current_pose, self._waypoints + ) + return "succeeded" + + class HandleDetections(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["detections_3d"], + output_keys=["person_point"], + ) + + def execute(self, userdata): + if len(userdata.detections_3d.detected_objects) == 0: + rospy.logwarn("No person detected, returning failed.") + return "failed" + userdata.person_point = userdata.detections_3d.detected_objects[0].point + return "succeeded" + + class HandleResponse(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["gesture_detected"], + ) + + def execute(self, userdata): + if userdata.gesture_detected: + return "succeeded" + return "failed" + + def __init__( + self, + gesture: str, + waypoints: Union[List[Pose], None] = None, + location_param: Union[str, None] = None, + ): + smach.StateMachine.__init__( + self, outcomes=["succeeded", "failed"], output_keys=["person_point"] + ) + if waypoints is None and location_param is None: + raise ValueError("Either waypoints or location_param must be provided") + + if gesture == "person raising their left arm": + self.gesture = "raising_left_arm" + elif gesture == "person raising their right arm": + self.gesture = "raising_right_arm" + elif gesture == "person pointing to the right": + self.gesture == "pointing_to_the_right" + elif gesture == "person pointing to the left": + self.gesture == "pointing_to_the_left" + else: + raise ValueError(f"Gesture {gesture} not recognized") + + if waypoints is None: + waypoints_to_iterate: List[Pose] = [] + + room = rospy.get_param(location_param) + beacons = room["beacons"] + for beacon in beacons: + waypoint: Pose = Pose( + position=Point(**beacons[beacon]["near_pose"]["position"]), + orientation=Quaternion( + **beacons[beacon]["near_pose"]["orientation"] + ), + ) + waypoints_to_iterate.append(waypoint) + else: + waypoints_to_iterate: List[Pose] = waypoints + + with self: + smach.StateMachine.add( + "GET_POSE", + self.GetPose(), + transitions={"succeeded": "COMPUTE_PATH", "failed": "failed"}, + ) + + smach.StateMachine.add( + "COMPUTE_PATH", + self.ComputePath(waypoints_to_iterate), + transitions={"succeeded": "WAYPOINT_ITERATOR", "failed": "failed"}, + ) + + waypoint_iterator = smach.Iterator( + outcomes=["succeeded", "failed"], + it=lambda: range(len(waypoints_to_iterate)), + it_label="location_index", + input_keys=["waypoints"], + output_keys=["person_point"], + exhausted_outcome="failed", + ) + + with waypoint_iterator: + container_sm = smach.StateMachine( + outcomes=["succeeded", "failed", "continue"], + input_keys=["location_index", "waypoints"], + output_keys=["person_point"], + ) + + with container_sm: + smach.StateMachine.add( + "GET_LOCATION", + self.GetLocation(), + transitions={"succeeded": "GO_TO_LOCATION", "failed": "failed"}, + ) + + smach.StateMachine.add( + "GO_TO_LOCATION", + GoToLocation(), + transitions={ + "succeeded": "DETECT3D", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "DETECT3D", + Detect3D(filter=["person"]), + transitions={ + "succeeded": "HANDLE_DETECTIONS", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "HANDLE_DETECTIONS", + self.HandleDetections(), + transitions={ + "succeeded": "PLAY_MOTION", + "failed": "continue", + }, + ), + smach.StateMachine.add( + "PLAY_MOTION", + PlayMotion(motion_name="raise_torso"), + transitions={ + "succeeded": "GET_IMAGE", + "aborted": "failed", + "preempted": "failed", + }, + ), + smach.StateMachine.add( + "GET_IMAGE", + GetImage(), + transitions={"succeeded": "CHECK_GESTURE", "failed": "failed"}, + ) + smach.StateMachine.add( + "CHECK_GESTURE", + DetectGesture(self.gesture), + transitions={ + "succeeded": "HANDLE_RESPONSE", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "HANDLE_RESPONSE", + self.HandleResponse(), + transitions={"succeeded": "succeeded", "failed": "continue"}, + ) + + waypoint_iterator.set_contained_state( + "CONTAINER_STATE", container_sm, loop_outcomes=["continue"] + ) + smach.StateMachine.add( + "WAYPOINT_ITERATOR", + waypoint_iterator, + {"succeeded": "succeeded", "failed": "failed"}, + ) diff --git a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py index 39cd67ce5..00c743c20 100644 --- a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py +++ b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py @@ -4,7 +4,7 @@ WaitForPerson, DetectPointingDirection, Say, - PlayMotion, + DetectGesture, ReceiveObject, ) @@ -13,6 +13,24 @@ class CarryMyLuggage(smach.StateMachine): + class ProcessPointingDirection(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["gesture_detected"], + output_keys=["pointing_direction"], + ) + + def execute(self, userdata): + if userdata.gesture_detected == "pointing_to_the_left": + userdata.pointing_direction = "left" + elif userdata.gesture_detected == "pointing_to_the_right": + userdata.pointing_direction = "right" + else: + return "failed" + return "succeeded" + def __init__(self): smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"]) @@ -28,7 +46,16 @@ def __init__(self): smach.StateMachine.add( "DETECT_POINTING_DIRECTION", - DetectPointingDirection(), + DetectGesture(), + transitions={ + "succeeded": "PROCESS_POINTING_DIRECTION", + "failed": "SAY_FAILED_POINTING", + }, + ) + + smach.StateMachine.add( + "PROCESS_POINTING_DIRECTION", + CarryMyLuggage.ProcessPointingDirection(), transitions={ "succeeded": "SAY_BAG", "failed": "SAY_FAILED_POINTING", diff --git a/tasks/gpsr/config/lab.yaml b/tasks/gpsr/config/lab.yaml index 96cd8cd52..f62e2c4c8 100644 --- a/tasks/gpsr/config/lab.yaml +++ b/tasks/gpsr/config/lab.yaml @@ -9,8 +9,8 @@ arena: beacons: tv_desk: near_pose: - position: {x: 0.96, y: 0.82, z: 0.0} - orientation: {x: 0.0, y: 0.0, z: 0.76, w: 0.65} + position: {x: 0.989603867147269, y: 0.47950265241289325, z: 0.0} + orientation: {x: 0.0, y: 0.0, z: 0.738945375913055, w: 0.6737653385391044} near_polygon: [[-0.11, 0.00], [-0.08, 2.36], [1.84, 2.75], [2.02, 1.31]] search_pose: position: {x: 0.97, y: 1.60, z: 0.0} @@ -18,8 +18,8 @@ arena: search_polygon: [[0.40, 1.92], [0.31, 2.45], [1.36, 2.70], [1.456, 2.17]] window_desk: near_pose: - position: {x: 2.38, y: -3.0, z: 0.0} - orientation: {x: 0.0, y: 0.0, z: -0.62, w: 0.79} + position: {x: 2.6771759491107954, y: -2.2998999761206105, z: 0.0} + orientation: {x: 0.0, y: 0.0, z: -0.6148810103509047, w: 0.7886198977389871} near_polygon: [[3.34, -3.27], [3.75, -4.80], [2.47, -5.20], [1.75, -3.58]] search_pose: position: {x: 2.47, y: -3.73, z: 0.0} diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py index 0208686dd..2e005919b 100644 --- a/tasks/gpsr/src/gpsr/state_machine_factory.py +++ b/tasks/gpsr/src/gpsr/state_machine_factory.py @@ -2,7 +2,7 @@ import rospy import smach from typing import Dict, List -from lasr_skills import GoToLocation, FindNamedPerson # type: ignore +from lasr_skills import GoToLocation, FindNamedPerson, FindGesturePerson # type: ignore from gpsr.states import Talk @@ -43,11 +43,25 @@ def _handle_find_command(self, command_param: Dict): def _handle_talk_command(self, command_param: Dict): if "gesture" in command_param: - raise NotImplementedError("Talk command with gesture not implemented") - elif "text" in command_param: + if not "location" in command_param: + raise ValueError( + "Talk command with gesture but no room in command parameters" + ) + location_param_room = f"/gpsr/arena/rooms/{command_param['location']}" + self.sm.add( + f"STATE_{self._increment_state_count()}", + FindGesturePerson( + location_param=location_param_room, gesture=command_param["gesture"] + ), + transitions={ + "succeeded": f"STATE_{self.state_count + 1}", + "failed": "failed", + }, + ) + if "talk" in command_param: self.sm.add( f"STATE_{self._increment_state_count()}", - Talk(text=command_param["text"]), + Talk(talk_phrase=command_param["talk"]), transitions={ "succeeded": f"STATE_{self.state_count + 1}", "failed": "failed", @@ -143,8 +157,8 @@ def build_state_machine(self) -> smach.StateMachine: raise ValueError(f"Invalid command verb: {command_verb}") self.sm.add( - f"STATE_{self.state_count}", - Talk(text="I have completed the task."), + f"STATE_{self.state_count+1}", + Talk(talk_phrase="I have completed the task."), transitions={"succeeded": "succeeded", "failed": "failed"}, )