From 953d35c7851b6117aa5b77fa26fb8df9fc26e8c0 Mon Sep 17 00:00:00 2001 From: Matt Barker <105945282+m-barker@users.noreply.github.com> Date: Tue, 14 May 2024 10:47:43 +0100 Subject: [PATCH 1/3] Gesture detection (#202) * fix: incorrect remapping keys for handover * fix: remove speech server in launch file * feat: add find gesture person skill, with support for raisied arms * feat: add pointing to gesture detection * feat: add debug publisher to gesture detection * feat: add mimicing of gesture detected by Tiago * feat: update CML with new pointing API * feat: update GPSR API with new gesture detection skill * chore: remove old pointing skill & service * feat: make debug always be published --- .../lasr_vision_pointing/CMakeLists.txt | 213 ----------------- .../lasr_vision_pointing/images/.gitkeep | 0 .../vision/lasr_vision_pointing/package.xml | 77 ------ .../lasr_vision_pointing/requirements.in | 1 - .../lasr_vision_pointing/requirements.txt | 24 -- .../scripts/detect_pointing.py | 175 -------------- common/vision/lasr_vision_pointing/setup.py | 10 - skills/config/motions.yaml | 25 ++ skills/src/lasr_skills/__init__.py | 1 + skills/src/lasr_skills/detect_gesture.py | 97 ++++++-- skills/src/lasr_skills/detect_pointing.py | 26 --- skills/src/lasr_skills/find_gesture_person.py | 221 ++++++++++++++++++ .../src/carry_my_luggage/state_machine.py | 31 ++- tasks/gpsr/config/lab.yaml | 8 +- tasks/gpsr/src/gpsr/state_machine_factory.py | 26 ++- 15 files changed, 381 insertions(+), 554 deletions(-) delete mode 100644 common/vision/lasr_vision_pointing/CMakeLists.txt delete mode 100644 common/vision/lasr_vision_pointing/images/.gitkeep delete mode 100644 common/vision/lasr_vision_pointing/package.xml delete mode 100644 common/vision/lasr_vision_pointing/requirements.in delete mode 100644 common/vision/lasr_vision_pointing/requirements.txt delete mode 100644 common/vision/lasr_vision_pointing/scripts/detect_pointing.py delete mode 100644 common/vision/lasr_vision_pointing/setup.py delete mode 100644 skills/src/lasr_skills/detect_pointing.py create mode 100644 skills/src/lasr_skills/find_gesture_person.py diff --git a/common/vision/lasr_vision_pointing/CMakeLists.txt b/common/vision/lasr_vision_pointing/CMakeLists.txt deleted file mode 100644 index 66f1cf831..000000000 --- a/common/vision/lasr_vision_pointing/CMakeLists.txt +++ /dev/null @@ -1,213 +0,0 @@ -cmake_minimum_required(VERSION 3.0.2) -project(lasr_vision_pointing) - -## Compile as C++11, supported in ROS Kinetic and newer -# add_compile_options(-std=c++11) - -## Find catkin macros and libraries -## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) -## is used, also find other catkin packages -find_package(catkin REQUIRED COMPONENTS - geometry_msgs - sensor_msgs - roscpp - rospy - std_msgs - catkin_virtualenv -) - -## System dependencies are found with CMake's conventions -# find_package(Boost REQUIRED COMPONENTS system) - - -## Uncomment this if the package has a setup.py. This macro ensures -## modules and global scripts declared therein get installed -## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html -catkin_python_setup() -catkin_generate_virtualenv( - INPUT_REQUIREMENTS requirements.in - PYTHON_INTERPRETER python3.10 -) - -################################################ -## Declare ROS messages, services and actions ## -################################################ - -## To declare and build messages, services or actions from within this -## package, follow these steps: -## * Let MSG_DEP_SET be the set of packages whose message types you use in -## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). -## * In the file package.xml: -## * add a build_depend tag for "message_generation" -## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET -## * If MSG_DEP_SET isn't empty the following dependency has been pulled in -## but can be declared for certainty nonetheless: -## * add a exec_depend tag for "message_runtime" -## * In this file (CMakeLists.txt): -## * add "message_generation" and every package in MSG_DEP_SET to -## find_package(catkin REQUIRED COMPONENTS ...) -## * add "message_runtime" and every package in MSG_DEP_SET to -## catkin_package(CATKIN_DEPENDS ...) -## * uncomment the add_*_files sections below as needed -## and list every .msg/.srv/.action file to be processed -## * uncomment the generate_messages entry below -## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) - -## Generate messages in the 'msg' folder -# add_message_files( -# FILES -# Message1.msg -# Message2.msg -# ) - -## Generate services in the 'srv' folder -# add_service_files( -# FILES -# PointingDirection.srv -# ) - -## Generate actions in the 'action' folder -# add_action_files( -# FILES -# Action1.action -# Action2.action -# ) - -## Generate added messages and services with any dependencies listed here -# generate_messages( -# DEPENDENCIES -# geometry_msgs -# std_msgs -# sensor_msgs -# ) - -################################################ -## Declare ROS dynamic reconfigure parameters ## -################################################ - -## To declare and build dynamic reconfigure parameters within this -## package, follow these steps: -## * In the file package.xml: -## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" -## * In this file (CMakeLists.txt): -## * add "dynamic_reconfigure" to -## find_package(catkin REQUIRED COMPONENTS ...) -## * uncomment the "generate_dynamic_reconfigure_options" section below -## and list every .cfg file to be processed - -## Generate dynamic reconfigure parameters in the 'cfg' folder -# generate_dynamic_reconfigure_options( -# cfg/DynReconf1.cfg -# cfg/DynReconf2.cfg -# ) - -################################### -## catkin specific configuration ## -################################### -## The catkin_package macro generates cmake config files for your package -## Declare things to be passed to dependent projects -## INCLUDE_DIRS: uncomment this if your package contains header files -## LIBRARIES: libraries you create in this project that dependent projects also need -## CATKIN_DEPENDS: catkin_packages dependent projects also need -## DEPENDS: system dependencies of this project that dependent projects also need -catkin_package( -# INCLUDE_DIRS include -# LIBRARIES lasr_vision_pointing - CATKIN_DEPENDS geometry_msgs roscpp rospy std_msgs sensor_msgs -# DEPENDS system_lib -) - -########### -## Build ## -########### - -## Specify additional locations of header files -## Your package locations should be listed before other locations -include_directories( -# include - ${catkin_INCLUDE_DIRS} -) - -## Declare a C++ library -# add_library(${PROJECT_NAME} -# src/${PROJECT_NAME}/lasr_vision_pointing.cpp -# ) - -## Add cmake target dependencies of the library -## as an example, code may need to be generated before libraries -## either from message generation or dynamic reconfigure -# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) - -## Declare a C++ executable -## With catkin_make all packages are built within a single CMake context -## The recommended prefix ensures that target names across packages don't collide -# add_executable(${PROJECT_NAME}_node src/lasr_vision_pointing_node.cpp) - -## Rename C++ executable without prefix -## The above recommended prefix causes long target names, the following renames the -## target back to the shorter version for ease of user use -## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" -# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") - -## Add cmake target dependencies of the executable -## same as for the library above -# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) - -## Specify libraries to link a library or executable target against -# target_link_libraries(${PROJECT_NAME}_node -# ${catkin_LIBRARIES} -# ) - -############# -## Install ## -############# - -# all install targets should use catkin DESTINATION variables -# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html - -## Mark executable scripts (Python etc.) for installation -## in contrast to setup.py, you can choose the destination -catkin_install_python(PROGRAMS - scripts/detect_pointing.py - DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} -) - -## Mark executables for installation -## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html -# install(TARGETS ${PROJECT_NAME}_node -# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} -# ) - -## Mark libraries for installation -## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html -# install(TARGETS ${PROJECT_NAME} -# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} -# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} -# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} -# ) - -## Mark cpp header files for installation -# install(DIRECTORY include/${PROJECT_NAME}/ -# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} -# FILES_MATCHING PATTERN "*.h" -# PATTERN ".svn" EXCLUDE -# ) - -## Mark other files for installation (e.g. launch and bag files, etc.) -install(FILES - requirements.txt - DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} -) - -############# -## Testing ## -############# - -## Add gtest based cpp test target and link libraries -# catkin_add_gtest(${PROJECT_NAME}-test test/test_lasr_vision_pointing.cpp) -# if(TARGET ${PROJECT_NAME}-test) -# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) -# endif() - -## Add folders to be run by python nosetests -# catkin_add_nosetests(test) diff --git a/common/vision/lasr_vision_pointing/images/.gitkeep b/common/vision/lasr_vision_pointing/images/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/common/vision/lasr_vision_pointing/package.xml b/common/vision/lasr_vision_pointing/package.xml deleted file mode 100644 index d99ef49f8..000000000 --- a/common/vision/lasr_vision_pointing/package.xml +++ /dev/null @@ -1,77 +0,0 @@ - - - lasr_vision_pointing - 0.0.0 - The lasr_vision_pointing package - - - - - Serge Alkhalil - - - - - - TODO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - catkin - geometry_msgs - roscpp - rospy - std_msgs - sensor_msgs - geometry_msgs - roscpp - rospy - std_msgs - geometry_msgs - roscpp - rospy - std_msgs - sensor_msgs - lasr_vision_msgs - - - catkin_virtualenv - - - - - requirements.txt - - - diff --git a/common/vision/lasr_vision_pointing/requirements.in b/common/vision/lasr_vision_pointing/requirements.in deleted file mode 100644 index c1b708709..000000000 --- a/common/vision/lasr_vision_pointing/requirements.in +++ /dev/null @@ -1 +0,0 @@ -mediapipe==0.10.10 \ No newline at end of file diff --git a/common/vision/lasr_vision_pointing/requirements.txt b/common/vision/lasr_vision_pointing/requirements.txt deleted file mode 100644 index e6ef834ab..000000000 --- a/common/vision/lasr_vision_pointing/requirements.txt +++ /dev/null @@ -1,24 +0,0 @@ -absl-py==2.1.0 # via mediapipe -attrs==23.2.0 # via mediapipe -cffi==1.16.0 # via sounddevice -contourpy==1.2.1 # via matplotlib -cycler==0.12.1 # via matplotlib -flatbuffers==24.3.25 # via mediapipe -fonttools==4.51.0 # via matplotlib -jax==0.4.26 # via mediapipe -kiwisolver==1.4.5 # via matplotlib -matplotlib==3.8.4 # via mediapipe -mediapipe==0.10.10 # via -r requirements.in -ml-dtypes==0.4.0 # via jax -numpy==1.26.4 # via contourpy, jax, matplotlib, mediapipe, ml-dtypes, opencv-contrib-python, opt-einsum, scipy -opencv-contrib-python==4.9.0.80 # via mediapipe -opt-einsum==3.3.0 # via jax -packaging==24.0 # via matplotlib -pillow==10.3.0 # via matplotlib -protobuf==3.20.3 # via mediapipe -pycparser==2.22 # via cffi -pyparsing==3.1.2 # via matplotlib -python-dateutil==2.9.0.post0 # via matplotlib -scipy==1.13.0 # via jax -six==1.16.0 # via python-dateutil -sounddevice==0.4.6 # via mediapipe diff --git a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py b/common/vision/lasr_vision_pointing/scripts/detect_pointing.py deleted file mode 100644 index b56e0d14e..000000000 --- a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python3 - -import os -import cv2 -import rospkg -import rospy -import mediapipe as mp -from cv_bridge import CvBridge -from lasr_vision_msgs.srv import ( - YoloDetectionRequest, - YoloDetection, - PointingDirection, - PointingDirectionResponse, - PointingDirectionRequest, -) - - -class PointingDetector: - def __init__(self): - self.detect_service = rospy.ServiceProxy("/yolov8/detect", YoloDetection) - self.service = rospy.Service( - "/pointing_detection_service", PointingDirection, self.excute - ) - self.counter = 0 - - # Load MediaPipe Pose model - self.mp_pose = mp.solutions.pose - self.pose = self.mp_pose.Pose() - - def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse: - img = req.image_raw - resp = PointingDirectionResponse() - people_bounding_box = self.detection(img) - - if people_bounding_box: - for person in people_bounding_box: - keypoints = self.detect_keypoints( - img - ) # Detect keypoints using MediaPipe - direction = self.determine_pointing_direction( - keypoints, buffer_width=25 - ) - rospy.loginfo(f"Person detected pointing: {direction}") - - # Visualize pointing direction with landmarks - image_path = os.path.join( - rospkg.RosPack().get_path("lasr_vision_pointing"), - "images", - f"image{self.counter}.jpg", - ) - self.visualize_pointing_direction_with_landmarks( - image_path, person, direction, keypoints - ) - - resp.direction = direction - else: - resp.direction = "NONE" - - self.counter += 1 - return resp - - def store_image(self, img): - package_path = rospkg.RosPack().get_path("lasr_vision_pointing") - os.chdir(os.path.abspath(os.path.join(package_path, "images"))) - cv2.imwrite(f"image{self.counter}.jpg", img) - - def detect_keypoints(self, img): - img_bridge = CvBridge().imgmsg_to_cv2(img, desired_encoding="passthrough") - img_rgb = cv2.cvtColor(img_bridge, cv2.COLOR_BGR2RGB) - - # store the image - self.store_image(img_rgb) - - results = self.pose.process(img_rgb) - - keypoints = [] - if results.pose_landmarks: - for landmark in results.pose_landmarks.landmark: - x = int(landmark.x * img.width) - y = int(landmark.y * img.height) - keypoints.append((x, y)) - - return keypoints - - def detection(self, img): - request = YoloDetectionRequest() - request.image_raw = img # sensor_msgs/Image - request.dataset = "yolov8n-seg.pt" # YOLOv8 model, auto-downloads - request.confidence = 0.7 # minimum confidence to include in results - request.nms = 0.4 # non maximal supression - - # send request - response = self.detect_service(request) - - result = [] - for detection in response.detected_objects: - if detection.name == "person": - # cords of person in image - result.append(detection.xywh) - - return result - - def determine_pointing_direction(self, keypoints, buffer_width=50): - # Ensure keypoints are available - if ( - len(keypoints) >= 7 - ): # Ensure we have at least 7 keypoints for the upper body - # Extract relevant keypoints for shoulders and wrists - left_shoulder = keypoints[self.mp_pose.PoseLandmark.LEFT_SHOULDER.value] - right_shoulder = keypoints[self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value] - left_wrist = keypoints[self.mp_pose.PoseLandmark.LEFT_WRIST.value] - right_wrist = keypoints[self.mp_pose.PoseLandmark.RIGHT_WRIST.value] - - # Ensure all keypoints are detected - if left_shoulder and right_shoulder and left_wrist and right_wrist: - # Calculate the x-coordinate difference between shoulders and wrists - left_diff = left_wrist[0] - left_shoulder[0] - right_diff = right_shoulder[0] - right_wrist[0] - - # Determine pointing direction based on the difference in x-coordinates - if abs(left_diff - right_diff) < buffer_width: - return "FORWARDS" - elif abs(left_diff) > buffer_width and abs(left_diff) > abs(right_diff): - return "LEFT" if left_diff > 0 else "RIGHT" - elif abs(right_diff) > buffer_width and abs(right_diff) > abs( - left_diff - ): - return "RIGHT" if right_diff > 0 else "LEFT" - - # Default: Determine direction based on the relative position to the center of the image - return "NONE" - - def visualize_pointing_direction_with_landmarks( - self, image_path, person_bbox, pointing_direction, keypoints - ): - # Load image - img = cv2.imread(image_path) - - # Extract person bbox coordinates - x, y, w, h = person_bbox - # Calculate center of bbox - center_x = x + w // 2 - center_y = y + h // 2 - - # Calculate endpoint of arrow based on pointing direction - arrow_length = min(w, h) // 2 - if pointing_direction == "LEFT": - endpoint = (center_x - arrow_length, center_y) - elif pointing_direction == "RIGHT": - endpoint = (center_x + arrow_length, center_y) - elif pointing_direction == "FORWARDS": - endpoint = (center_x, center_y) - else: - return # No pointing direction detected - - # Draw arrow on image - color = (0, 255, 0) # Green color - thickness = 2 - cv2.arrowedLine(img, (center_x, center_y), endpoint, color, thickness) - - # Draw landmarks (keypoints) on the image - for keypoint in keypoints: - if keypoint: - cv2.circle(img, keypoint, 3, (0, 0, 255), -1) # Red color for landmarks - - # Store image with pointing direction visualization and landmarks - output_image_path = f"landmark_image{self.counter}.jpg" # Change the output image path as needed - cv2.imwrite(output_image_path, img) - - -if __name__ == "__main__": - rospy.init_node("pointing_detector") - pointer = PointingDetector() - rospy.loginfo("Pointing Detector is running") - rospy.spin() diff --git a/common/vision/lasr_vision_pointing/setup.py b/common/vision/lasr_vision_pointing/setup.py deleted file mode 100644 index e094797e8..000000000 --- a/common/vision/lasr_vision_pointing/setup.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python3 - -from distutils.core import setup -from catkin_pkg.python_setup import generate_distutils_setup - -setup_args = generate_distutils_setup( - packages=["lasr_vision_pointing"], package_dir={"": "src"} -) - -setup(**setup_args) diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml index 302eb6083..2b98cc74e 100644 --- a/skills/config/motions.yaml +++ b/skills/config/motions.yaml @@ -39,4 +39,29 @@ play_motion: joints: [head_1_joint, head_2_joint] points: - positions: [0.0, 0.0] + time_from_start: 0.0 + raise_torso: + joints: [torso_lift_joint] + points: + - positions: [0.5] + time_from_start: 0.0 + pointing_to_the_right: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [2.70, -1.47, 0.38, 1.92, 1.66, 0.42, 0.08] + time_from_start: 0.0 + pointing_to_the_left: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [0.32, -1.42, -0.39, 2.05, 1.62, 0.56, 0.09] + time_from_start: 0.0 + raising_right_arm: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [0.12, 0.09, -3.16, 1.65, 1.53, 0.00, 0.13] + time_from_start: 0.0 + raising_left_arm: + joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] + points: + - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13] time_from_start: 0.0 \ No newline at end of file diff --git a/skills/src/lasr_skills/__init__.py b/skills/src/lasr_skills/__init__.py index 736ee4823..4bceb19ca 100755 --- a/skills/src/lasr_skills/__init__.py +++ b/skills/src/lasr_skills/__init__.py @@ -21,3 +21,4 @@ from .recognise import Recognise from .detect_pointing import DetectPointingDirection from .detect_gesture import DetectGesture +from .find_gesture_person import FindGesturePerson diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py index 5ac565366..28d4f84cb 100755 --- a/skills/src/lasr_skills/detect_gesture.py +++ b/skills/src/lasr_skills/detect_gesture.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 +from typing import Optional import smach import rospy +import cv2 +import cv2_img from lasr_skills.vision import GetImage +from lasr_skills import PlayMotion from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest from lasr_vision_msgs.msg import BodyPixMaskRequest +from sensor_msgs.msg import Image class DetectGesture(smach.State): @@ -11,16 +16,25 @@ class DetectGesture(smach.State): State for detecting gestures. """ - def __init__(self, gesture_to_detect: str = "raising_left_hand"): - self.gesture_to_detect = gesture_to_detect + def __init__( + self, + gesture_to_detect: Optional[str] = None, + buffer_width: int = 50, + ): + """Optionally stores the gesture to detect. If None, it will infer the gesture from the keypoints.""" smach.State.__init__( self, outcomes=["succeeded", "failed"], input_keys=["img_msg"], output_keys=["gesture_detected"], ) - + self.debug = debug + self.gesture_to_detect = gesture_to_detect self.body_pix_client = rospy.ServiceProxy("/bodypix/detect", BodyPixDetection) + self.debug_publisher = rospy.Publisher( + "/gesture_detection/debug", Image, queue_size=1 + ) + self.buffer_width = buffer_width def execute(self, userdata): @@ -54,29 +68,67 @@ def execute(self, userdata): "y": keypoint.xy[1], "score": keypoint.score, } - - if self.gesture_to_detect == "raising_left_hand": + if ( + self.gesture_to_detect == "raising_left_arm" + or self.gesture_to_detect is None + ): if part_info["leftWrist"]["y"] < part_info["leftShoulder"]["y"]: - userdata.gesture_detected = True - else: - userdata.gesture_detected = False - elif self.gesture_to_detect == "raising_right_hand": + self.gesture_to_detect = "raising_left_arm" + if ( + self.gesture_to_detect == "raising_right_arm" + or self.gesture_to_detect is None + ): if part_info["rightWrist"]["y"] < part_info["rightShoulder"]["y"]: - userdata.gesture_detected = True - else: - userdata.gesture_detected = False - else: - raise ValueError("Invalid gesture to detect") + self.gesture_to_detect = "raising_right_arm" + if ( + self.gesture_to_detect == "pointing_to_the_left" + or self.gesture_to_detect is None + ): + if ( + part_info["leftWrist"]["x"] - self.buffer_width + > part_info["leftShoulder"]["x"] + ): + self.gesture_to_detect = "pointing_to_the_left" + if ( + self.gesture_to_detect == "pointing_to_the_right" + or self.gesture_to_detect is None + ): + if ( + part_info["rightShoulder"]["x"] - self.buffer_width + > part_info["rightWrist"]["x"] + ): + self.gesture_to_detect = "pointing_to_the_right" + + if self.gesture_to_detect is None: + self.gesture_to_detect = "none" + + userdata.gesture_detected = self.gesture_to_detect + + cv2_gesture_img = cv2_img.msg_to_cv2_img(userdata.img_msg) + # Add text to the image + cv2.putText( + cv2_gesture_img, + self.gesture_to_detect, + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (0, 255, 0), + 2, + cv2.LINE_AA, + ) + # Publish the image + self.debug_publisher.publish(cv2_img.cv2_img_to_msg(cv2_gesture_img)) return "succeeded" +### For example usage: class GestureDetectionSM(smach.StateMachine): """ State machine for detecting gestures. """ - def __init__(self, gesture_to_detect: str = "raising_left_hand"): + def __init__(self, gesture_to_detect: Optional[str] = None): smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"]) self.gesture_to_detect = gesture_to_detect with self: @@ -95,9 +147,22 @@ def __init__(self, gesture_to_detect: str = "raising_left_hand"): if __name__ == "__main__": rospy.init_node("gesture_detection_sm") + ### Example usage: while not rospy.is_shutdown(): sm = GestureDetectionSM() sm.execute() - print("Raising Left Hand Detected:", sm.userdata.gesture_detected) + gesture_state = PlayMotion(motion_name=sm.userdata.gesture_detected) + gesture_sm = smach.StateMachine(outcomes=["succeeded", "failed"]) + with gesture_sm: + smach.StateMachine.add( + "GESTURE_STATE", + gesture_state, + transitions={ + "succeeded": "succeeded", + "aborted": "failed", + "preempted": "failed", + }, + ) + gesture_sm.execute() rospy.spin() diff --git a/skills/src/lasr_skills/detect_pointing.py b/skills/src/lasr_skills/detect_pointing.py deleted file mode 100644 index 21895dfc1..000000000 --- a/skills/src/lasr_skills/detect_pointing.py +++ /dev/null @@ -1,26 +0,0 @@ -import rospy -import smach -from sensor_msgs.msg import Image - -from lasr_vision_msgs.srv import PointingDirection - - -class DetectPointingDirection(smach.State): - def __init__(self, image_topic: str = "/xtion/rgb/image_raw"): - smach.State.__init__( - self, outcomes=["succeeded", "failed"], output_keys=["pointing_direction"] - ) - self._image_topic = image_topic - self._pointing_service = rospy.ServiceProxy( - "/pointing_detection_service", PointingDirection - ) - self._pointing_service.wait_for_service() - - def execute(self, userdata): - - img_msg = rospy.wait_for_message(self._image_topic, Image) - resp = self._pointing_service(img_msg) - if resp.direction == "NONE" or resp.direction == "FORWARDS": - return "failed" - userdata.pointing_direction = resp.direction - return "succeeded" diff --git a/skills/src/lasr_skills/find_gesture_person.py b/skills/src/lasr_skills/find_gesture_person.py new file mode 100644 index 000000000..ae195494a --- /dev/null +++ b/skills/src/lasr_skills/find_gesture_person.py @@ -0,0 +1,221 @@ +import smach +import rospy + +from lasr_skills import Detect3D, GoToLocation, DetectGesture, PlayMotion +from lasr_skills.vision import GetImage +import navigation_helpers + +from geometry_msgs.msg import Pose, PoseWithCovarianceStamped, Point, Quaternion + + +from typing import List, Union + + +class FindGesturePerson(smach.StateMachine): + class GetLocation(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["location_index", "waypoints"], + output_keys=["location"], + ) + + def execute(self, userdata): + userdata.location = userdata.waypoints[userdata.location_index] + return "succeeded" + + class GetPose(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + output_keys=["current_pose"], + ) + + def execute(self, userdata): + userdata.current_pose = rospy.wait_for_message( + "/amcl_pose", PoseWithCovarianceStamped + ).pose.pose + return "succeeded" + + class ComputePath(smach.State): + def __init__(self, waypoints: List[Pose]): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["current_pose"], + output_keys=["waypoints"], + ) + self._waypoints = waypoints + + def execute(self, userdata): + userdata.waypoints = navigation_helpers.min_hamiltonian_path( + userdata.current_pose, self._waypoints + ) + return "succeeded" + + class HandleDetections(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["detections_3d"], + output_keys=["person_point"], + ) + + def execute(self, userdata): + if len(userdata.detections_3d.detected_objects) == 0: + rospy.logwarn("No person detected, returning failed.") + return "failed" + userdata.person_point = userdata.detections_3d.detected_objects[0].point + return "succeeded" + + class HandleResponse(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["gesture_detected"], + ) + + def execute(self, userdata): + if userdata.gesture_detected: + return "succeeded" + return "failed" + + def __init__( + self, + gesture: str, + waypoints: Union[List[Pose], None] = None, + location_param: Union[str, None] = None, + ): + smach.StateMachine.__init__( + self, outcomes=["succeeded", "failed"], output_keys=["person_point"] + ) + if waypoints is None and location_param is None: + raise ValueError("Either waypoints or location_param must be provided") + + if gesture == "person raising their left arm": + self.gesture = "raising_left_arm" + elif gesture == "person raising their right arm": + self.gesture = "raising_right_arm" + elif gesture == "person pointing to the right": + self.gesture == "pointing_to_the_right" + elif gesture == "person pointing to the left": + self.gesture == "pointing_to_the_left" + else: + raise ValueError(f"Gesture {gesture} not recognized") + + if waypoints is None: + waypoints_to_iterate: List[Pose] = [] + + room = rospy.get_param(location_param) + beacons = room["beacons"] + for beacon in beacons: + waypoint: Pose = Pose( + position=Point(**beacons[beacon]["near_pose"]["position"]), + orientation=Quaternion( + **beacons[beacon]["near_pose"]["orientation"] + ), + ) + waypoints_to_iterate.append(waypoint) + else: + waypoints_to_iterate: List[Pose] = waypoints + + with self: + smach.StateMachine.add( + "GET_POSE", + self.GetPose(), + transitions={"succeeded": "COMPUTE_PATH", "failed": "failed"}, + ) + + smach.StateMachine.add( + "COMPUTE_PATH", + self.ComputePath(waypoints_to_iterate), + transitions={"succeeded": "WAYPOINT_ITERATOR", "failed": "failed"}, + ) + + waypoint_iterator = smach.Iterator( + outcomes=["succeeded", "failed"], + it=lambda: range(len(waypoints_to_iterate)), + it_label="location_index", + input_keys=["waypoints"], + output_keys=["person_point"], + exhausted_outcome="failed", + ) + + with waypoint_iterator: + container_sm = smach.StateMachine( + outcomes=["succeeded", "failed", "continue"], + input_keys=["location_index", "waypoints"], + output_keys=["person_point"], + ) + + with container_sm: + smach.StateMachine.add( + "GET_LOCATION", + self.GetLocation(), + transitions={"succeeded": "GO_TO_LOCATION", "failed": "failed"}, + ) + + smach.StateMachine.add( + "GO_TO_LOCATION", + GoToLocation(), + transitions={ + "succeeded": "DETECT3D", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "DETECT3D", + Detect3D(filter=["person"]), + transitions={ + "succeeded": "HANDLE_DETECTIONS", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "HANDLE_DETECTIONS", + self.HandleDetections(), + transitions={ + "succeeded": "PLAY_MOTION", + "failed": "continue", + }, + ), + smach.StateMachine.add( + "PLAY_MOTION", + PlayMotion(motion_name="raise_torso"), + transitions={ + "succeeded": "GET_IMAGE", + "aborted": "failed", + "preempted": "failed", + }, + ), + smach.StateMachine.add( + "GET_IMAGE", + GetImage(), + transitions={"succeeded": "CHECK_GESTURE", "failed": "failed"}, + ) + smach.StateMachine.add( + "CHECK_GESTURE", + DetectGesture(self.gesture), + transitions={ + "succeeded": "HANDLE_RESPONSE", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "HANDLE_RESPONSE", + self.HandleResponse(), + transitions={"succeeded": "succeeded", "failed": "continue"}, + ) + + waypoint_iterator.set_contained_state( + "CONTAINER_STATE", container_sm, loop_outcomes=["continue"] + ) + smach.StateMachine.add( + "WAYPOINT_ITERATOR", + waypoint_iterator, + {"succeeded": "succeeded", "failed": "failed"}, + ) diff --git a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py index 39cd67ce5..00c743c20 100644 --- a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py +++ b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py @@ -4,7 +4,7 @@ WaitForPerson, DetectPointingDirection, Say, - PlayMotion, + DetectGesture, ReceiveObject, ) @@ -13,6 +13,24 @@ class CarryMyLuggage(smach.StateMachine): + class ProcessPointingDirection(smach.State): + def __init__(self): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["gesture_detected"], + output_keys=["pointing_direction"], + ) + + def execute(self, userdata): + if userdata.gesture_detected == "pointing_to_the_left": + userdata.pointing_direction = "left" + elif userdata.gesture_detected == "pointing_to_the_right": + userdata.pointing_direction = "right" + else: + return "failed" + return "succeeded" + def __init__(self): smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"]) @@ -28,7 +46,16 @@ def __init__(self): smach.StateMachine.add( "DETECT_POINTING_DIRECTION", - DetectPointingDirection(), + DetectGesture(), + transitions={ + "succeeded": "PROCESS_POINTING_DIRECTION", + "failed": "SAY_FAILED_POINTING", + }, + ) + + smach.StateMachine.add( + "PROCESS_POINTING_DIRECTION", + CarryMyLuggage.ProcessPointingDirection(), transitions={ "succeeded": "SAY_BAG", "failed": "SAY_FAILED_POINTING", diff --git a/tasks/gpsr/config/lab.yaml b/tasks/gpsr/config/lab.yaml index 96cd8cd52..f62e2c4c8 100644 --- a/tasks/gpsr/config/lab.yaml +++ b/tasks/gpsr/config/lab.yaml @@ -9,8 +9,8 @@ arena: beacons: tv_desk: near_pose: - position: {x: 0.96, y: 0.82, z: 0.0} - orientation: {x: 0.0, y: 0.0, z: 0.76, w: 0.65} + position: {x: 0.989603867147269, y: 0.47950265241289325, z: 0.0} + orientation: {x: 0.0, y: 0.0, z: 0.738945375913055, w: 0.6737653385391044} near_polygon: [[-0.11, 0.00], [-0.08, 2.36], [1.84, 2.75], [2.02, 1.31]] search_pose: position: {x: 0.97, y: 1.60, z: 0.0} @@ -18,8 +18,8 @@ arena: search_polygon: [[0.40, 1.92], [0.31, 2.45], [1.36, 2.70], [1.456, 2.17]] window_desk: near_pose: - position: {x: 2.38, y: -3.0, z: 0.0} - orientation: {x: 0.0, y: 0.0, z: -0.62, w: 0.79} + position: {x: 2.6771759491107954, y: -2.2998999761206105, z: 0.0} + orientation: {x: 0.0, y: 0.0, z: -0.6148810103509047, w: 0.7886198977389871} near_polygon: [[3.34, -3.27], [3.75, -4.80], [2.47, -5.20], [1.75, -3.58]] search_pose: position: {x: 2.47, y: -3.73, z: 0.0} diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py index 0208686dd..2e005919b 100644 --- a/tasks/gpsr/src/gpsr/state_machine_factory.py +++ b/tasks/gpsr/src/gpsr/state_machine_factory.py @@ -2,7 +2,7 @@ import rospy import smach from typing import Dict, List -from lasr_skills import GoToLocation, FindNamedPerson # type: ignore +from lasr_skills import GoToLocation, FindNamedPerson, FindGesturePerson # type: ignore from gpsr.states import Talk @@ -43,11 +43,25 @@ def _handle_find_command(self, command_param: Dict): def _handle_talk_command(self, command_param: Dict): if "gesture" in command_param: - raise NotImplementedError("Talk command with gesture not implemented") - elif "text" in command_param: + if not "location" in command_param: + raise ValueError( + "Talk command with gesture but no room in command parameters" + ) + location_param_room = f"/gpsr/arena/rooms/{command_param['location']}" + self.sm.add( + f"STATE_{self._increment_state_count()}", + FindGesturePerson( + location_param=location_param_room, gesture=command_param["gesture"] + ), + transitions={ + "succeeded": f"STATE_{self.state_count + 1}", + "failed": "failed", + }, + ) + if "talk" in command_param: self.sm.add( f"STATE_{self._increment_state_count()}", - Talk(text=command_param["text"]), + Talk(talk_phrase=command_param["talk"]), transitions={ "succeeded": f"STATE_{self.state_count + 1}", "failed": "failed", @@ -143,8 +157,8 @@ def build_state_machine(self) -> smach.StateMachine: raise ValueError(f"Invalid command verb: {command_verb}") self.sm.add( - f"STATE_{self.state_count}", - Talk(text="I have completed the task."), + f"STATE_{self.state_count+1}", + Talk(talk_phrase="I have completed the task."), transitions={"succeeded": "succeeded", "failed": "failed"}, ) From d7c3ca98609481dd80b5cb3bf8193831b6728372 Mon Sep 17 00:00:00 2001 From: fireblonde <71558254+fireblonde@users.noreply.github.com> Date: Fri, 24 May 2024 12:30:47 +0100 Subject: [PATCH 2/3] feat: look at person (#166) * refactor: use correct naming conventions. * feat/refactor: introduce generic object detection skills, modularise from this. * refactor: let the generic perception skills acquire their own sensor input (image/pcl). * refactor/chore: imports and add LookForSeats stub. * feat: always publish markers for debugging, when doing 3D point estimation. * WIP: look for place to seat people. * chore: default sample rate. * chore: setup receptionist demo points. * fix: sleep. * feat: LookToPoint skill. * chore: reorganise receptionist Rasa data and pipeline. * feat: additional motions. * chore: update receptionist demo. * feat: look for chair and seat guest. * WIP: get guest name. * feat: be more informative. * fix: set unknown. * feat: announce name as well as drink. * refactor: ignore point. * feat: be more explicit; look at the chair and semantically describe its location. * WIP. * feat: add option to specify device to use for whisper. * feat: simple transcribe microphone service. * refactor: make transcribe and parse service use the new 'simple' transcribe audio service. * feat: terminate audio interface. * chore: update CMakeLists. * refactor: acquire microphone when initialising service. * chore: update launch file. * fix: remove call to redundant method, * fix: error handling. * feat/fix: pre-load whisper model by prompting it, adjust for ambient noise at the beginning. * chore: configuration required for deploying docs * docs: update container documentation * ci: first draft of documentation build through actions * ci: remove submodules * ci: use ubuntu 20.04 * ci: try using ros:noetic container image * ci: catkin tools isn't available in container * ci: remove extranous source line * ci: try the one-liner ROS installer * ci: use the official ROS noetic installation guide * ci: install catkin tools * ci: try sourcing before commands * ci: deploy to github pages (no stage check yet) * ci: use deploy key * ci: only deploy if on main * chore: update receptionist data and pipeline. * fix: explicitly set spacy NLP model. * fix: drop age for now, since spacy is broken(?). * feat: point and raise_torso motions. * feat: point to chair. * feat: additional motions. * fix: reset motions. * fix: handle empty transcriptions. * fix: enable start state. * fix: properly reset motions * fix(ci): use github_token for gh pages deploy. * revert: fix(ci): use github_token for gh pages deploy. * feat: save microphone output to audio files * chore: update gitignore with python setup files * fix: filename typo and add to cmake * testtesttest * Test * Test * Message Change * fixed message writting and reading * fixed image crop * Updated the feature extraction node and changed the messages. * fixed that torsal and head frames were inversed. * Changed colour format from BGR to RGB within the detection process. * keep the saved model * keep the model file * keep the saved model * Cancel saving the images (but sitll cannot see use cv2.imshow) * Runnable demo * added the hair colour distribution matching method * retrained model is very robust so changed the threshold * Moving the head to meet the person. * xyz axis readable. * (Hopefully) Runnable with 3d input. * Speak normally. * Try to move the head. * testtesttest * Test * Test * Message Change * fixed message writting and reading * fixed image crop * Updated the feature extraction node and changed the messages. * fixed that torsal and head frames were inversed. * Changed colour format from BGR to RGB within the detection process. * keep the saved model * keep the model file * keep the saved model * Cancel saving the images (but sitll cannot see use cv2.imshow) * Runnable demo * added the hair colour distribution matching method * retrained model is very robust so changed the threshold * Moving the head to meet the person. * xyz axis readable. * (Hopefully) Runnable with 3d input. * ah * At least the head moves, not looking at me though. * Cleaned the file to have only one model appear. * Replace the old model with the new one. * correct the lost module. * info update * fixed issues in the service * fixed a stupic typo * runnable version for full demo * Recover the state machine for demo. * Added a simple loop to refresh the frame taken, should work fine. * adding learn_face * adding detect faces * Add detect faces state * editing detect phases * Implement state for learning faces * adding in extra states * any changes * adjustments when testing * push lol * finished merging? * new state machine * merging from rexy laptop * fixed detect faces * getting rid of lasr_speech * swapping from base controllers to skills * fixing state machine after merge with main * making changes * TEST: .idea diff * feat: get name and drink skill & prior data yaml * fix: missing keys for get name & drink constructor * feat: stubs for get attribute state * feat: wrap head manager and lowering base into GoToLocation. * feat: state machine setup. * test: Github tracking fixes * feat: allow AskAndListen to take static text, a format string or reda from userdata. * refactor: being picky about spaces. * fix: import states in dunder init. * feat: get name and drink, and stub for attributes. * fix: typehint for userdata * feat: setup host. * fix: typehint. * fix: refactor ParseNameAndDrink state to convert possible names and drinks to lowercase * refactor: receptionist -> sm_merge (conflicts with python package...) * refactor: remove redundant __init__.py * fix: container consistency. * fix: initialise data correctly. * fix: lower. * fix: uncomment. * feat: take seating area pose and polygon as input to sm. * feat: guide guest to seating area. * feat: add introduce state * feat: add introduction to state machine * feat(WIP): find empty seat. * fix: sneaky parenthesis stops boolifying. * fix: correctly access guest attribute data * fix: get introduce working. * feat(WIP, still): find an empty seat. * feat/refactor: SeatGuest is working and rename. * feat: update init.py * refactor: static motions. * feat: seat guest in state machine. * feat: send head back to default. * refactor: center -> centre. * feat: logic for second guest and pass guest id through constructors. * fix: transition. * refactor: drop unused method. * Remove print statement and add TODO. * fix: namespace. * fix: transition and assume PlayMotion succeeded. * test: yaml setup for testing. * fix: transitions. * refactor: remove redundant comment. * fix: dumb intersection bug. * feat: clip skill for VQA * feat: attempt of clip for attributes * fix: missing imports * feat: tidied clip attributes * refactor: cleanup. * refactor: cleanup again. * refactor: massive cleanup. * refactor: remove hair colour images. * refactor: cleanup cv2_img. * refactor: move Vqa service message into lasr_vision_msgs. * fix: import. * cleanup: remove vision changes. * refactor: cleanup clip vqa skill. * refactor: cleanup describe people skill. * refactor: cleanup vision msgs. * refactor: remove shebang and unused import. * refactor: remove unnecessary message. * refactor: make QueryImage a ServiceState. * refactor: make QueryImage a ServiceState. * build: CMake and requirements. * feat: fix the msgs for bodypix * feat: test initial segment person * feat: filter the poses liek the masks * feat: filter the poses liek the masks * chore: add pcl to img msg and keep the same timestampt * chore: change the look to point to use pointstamp instead * chore: add the 3d image skill * feat: working look at person * chore: cleanup for PR * fix: the PR comments + add head manager disable/enable * feat: wip * feat: working wip * feat: look at multiple people working * chore: prep for PR * chore: reformat agaaaain --------- Co-authored-by: Jared Swift Co-authored-by: Paul Makles Co-authored-by: m-barker Co-authored-by: Benteng Ma Co-authored-by: MBT <59250708+MBTMBTMBT@users.noreply.github.com> Co-authored-by: tiago Co-authored-by: Zoe Co-authored-by: Aaliyah Merchant Co-authored-by: Haiwei Luo --- .../helpers/cv2_pcl/src/cv2_pcl/__init__.py | 11 + .../src/lasr_vision_bodypix/bodypix.py | 1 - .../lasr_vision_msgs/msg/BodyPixPose.msg | 1 - skills/src/lasr_skills/detect_faces.py | 19 +- skills/src/lasr_skills/look_at_person.py | 282 ++++++++++++++++++ skills/src/lasr_skills/look_to_point.py | 7 +- tasks/receptionist/launch/setup.launch | 1 - 7 files changed, 309 insertions(+), 13 deletions(-) create mode 100755 skills/src/lasr_skills/look_at_person.py diff --git a/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py b/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py index 05f3ef901..dfc27415c 100644 --- a/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py +++ b/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py @@ -2,12 +2,23 @@ from sensor_msgs.msg import PointCloud2 import ros_numpy as rnp import cv2 +from cv2_img import cv2_img_to_msg from typing import Tuple, Union Mat = np.ndarray +def pcl_to_img_msg(pcl: PointCloud2) -> Mat: + """ + Convert a given PointCloud2 message to img_msg + """ + # keep the same timestamp + cv2 = pcl_to_cv2(pcl) + + return cv2_img_to_msg(cv2, pcl.header.stamp) + + def pcl_to_cv2( pcl: PointCloud2, height: Union[int, None] = None, width: Union[int, None] = None ) -> Mat: diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py index dcd811b89..b6afa0f6a 100644 --- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py +++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py @@ -114,7 +114,6 @@ def detect( keypoints_msg = [] for i, keypoint in pose.keypoints.items(): - if camel_to_snake(keypoint.part) in request.masks[0].parts: keypoint_msg = BodyPixKeypoint() keypoint_msg.xy = [int(keypoint.position.x), int(keypoint.position.y)] diff --git a/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg b/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg index 1ba8b9f7d..0d416eeae 100644 --- a/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg +++ b/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg @@ -1,2 +1 @@ -# keypoints BodyPixKeypoint[] keypoints \ No newline at end of file diff --git a/skills/src/lasr_skills/detect_faces.py b/skills/src/lasr_skills/detect_faces.py index c0605b3f8..55af6204a 100644 --- a/skills/src/lasr_skills/detect_faces.py +++ b/skills/src/lasr_skills/detect_faces.py @@ -3,21 +3,30 @@ from lasr_vision_msgs.srv import DetectFaces as DetectFacesSrv from sensor_msgs.msg import Image +from cv2_pcl import pcl_to_img_msg class DetectFaces(smach.State): - - def __init__(self, image_topic: str = "/xtion/rgb/image_raw"): + def __init__( + self, + image_topic: str = "/xtion/rgb/image_raw", + ): smach.State.__init__( - self, outcomes=["succeeded", "failed"], output_keys=["detections"] + self, + outcomes=["succeeded", "failed"], + input_keys=["pcl_msg"], + output_keys=["detections"], ) + self.img_msg = None self._image_topic = image_topic self._detect_faces = rospy.ServiceProxy("/detect_faces", DetectFacesSrv) def execute(self, userdata): - img_msg = rospy.wait_for_message(self._image_topic, Image) + self.img_msg = pcl_to_img_msg(userdata.pcl_msg) + if not self.img_msg: + self.img_msg = rospy.wait_for_message(self._image_topic, Image) try: - result = self._detect_faces(img_msg) + result = self._detect_faces(self.img_msg) userdata.detections = result return "succeeded" except rospy.ServiceException as e: diff --git a/skills/src/lasr_skills/look_at_person.py b/skills/src/lasr_skills/look_at_person.py new file mode 100755 index 000000000..1fc477518 --- /dev/null +++ b/skills/src/lasr_skills/look_at_person.py @@ -0,0 +1,282 @@ +import smach_ros +from geometry_msgs.msg import PointStamped +import smach +from vision import GetPointCloud +from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest +from lasr_vision_msgs.msg import BodyPixMaskRequest +from lasr_skills import LookToPoint, DetectFaces +import cv2 +import rospy +from sensor_msgs.msg import Image +from cv2_img import cv2_img_to_msg, msg_to_cv2_img +from markers import create_and_publish_marker +from visualization_msgs.msg import Marker +from cv2_pcl import pcl_to_img_msg +import ros_numpy as rnp +import rosservice +from smach import CBState +from std_msgs.msg import String +import actionlib +from control_msgs.msg import PointHeadAction, PointHeadGoal +from geometry_msgs.msg import Point + +PUBLIC_CONTAINER = False + +try: + from pal_startup_msgs.srv import ( + StartupStart, + StartupStop, + StartupStartRequest, + StartupStopRequest, + ) +except ModuleNotFoundError: + PUBLIC_CONTAINER = True + + +class LookAtPerson(smach.StateMachine): + class CheckEyes(smach.State): + def __init__(self, debug=True): + smach.State.__init__( + self, + outcomes=["succeeded", "failed", "no_detection"], + input_keys=["bbox_eyes", "pcl_msg", "masks", "detections"], + output_keys=["pointstamped"], + ) + self.DEBUG = debug + self.img_pub = rospy.Publisher("/debug/image", Image, queue_size=1) + self.marker_pub = rospy.Publisher("eyes", Marker, queue_size=1) + self.look_at_pub = actionlib.SimpleActionClient( + "/head_controller/point_head_action", PointHeadAction + ) + + def execute(self, userdata): + rospy.sleep(1) + if len(userdata.bbox_eyes) < 1 and len(userdata.detections.detections) > 0: + return "succeeded" + elif ( + len(userdata.bbox_eyes) < 1 and len(userdata.detections.detections) < 1 + ): + return "no_detection" + + for det in userdata.bbox_eyes: + left_eye = det["left_eye"] + right_eye = det["right_eye"] + eye_point = (left_eye[0] + right_eye[0]) / 2, ( + left_eye[1] + right_eye[1] + ) / 2 + + if self.DEBUG: + img = msg_to_cv2_img(pcl_to_img_msg(userdata.pcl_msg)) + cv2.circle(img, (left_eye[0], left_eye[1]), 5, (0, 255, 0), -1) + cv2.circle(img, (right_eye[0], right_eye[1]), 5, (0, 255, 0), -1) + img_msg1 = cv2_img_to_msg(img) + self.img_pub.publish(img_msg1) + + pcl_xyz = rnp.point_cloud2.pointcloud2_to_xyz_array( + userdata.pcl_msg, remove_nans=False + ) + eye_point_pcl = pcl_xyz[int(eye_point[1]), int(eye_point[0])] + if any([True for i in eye_point_pcl if i != i]): + eye_point_pcl = pcl_xyz[int(right_eye[1]), int(right_eye[0])] + + look_at = PointStamped() + look_at.header = userdata.pcl_msg.header + look_at.point.x = eye_point_pcl[0] + look_at.point.y = eye_point_pcl[1] + look_at.point.z = eye_point_pcl[2] + + if self.DEBUG: + create_and_publish_marker(self.marker_pub, look_at, r=0, g=1, b=0) + + userdata.bbox_eyes.remove(det) + userdata.pointstamped = look_at + + self.look_at_pub.wait_for_server() + goal = PointHeadGoal() + goal.pointing_frame = "head_2_link" + goal.pointing_axis = Point(1.0, 0.0, 0.0) + goal.max_velocity = 1.0 + goal.target = look_at + self.look_at_pub.send_goal(goal) + + return "succeeded" + + return "failed" + + @smach.cb_interface(input_keys=["poses", "detections"], output_keys=["bbox_eyes"]) + def match_poses_and_detections(ud): + bbox_eyes = [] + for pose in ud.poses: + for detection in ud.detections.detections: + temp = { + "bbox": detection.xywh, + } + for keypoint in pose.keypoints: + if ( + keypoint.part == "leftEye" + and detection.xywh[0] + < keypoint.xy[0] + < detection.xywh[0] + detection.xywh[2] + and detection.xywh[1] + < keypoint.xy[1] + < detection.xywh[1] + detection.xywh[3] + ): + temp["left_eye"] = keypoint.xy + if ( + keypoint.part == "rightEye" + and detection.xywh[0] + < keypoint.xy[0] + < detection.xywh[0] + detection.xywh[2] + and detection.xywh[1] + < keypoint.xy[1] + < detection.xywh[1] + detection.xywh[3] + ): + temp["right_eye"] = keypoint.xy + + if "left_eye" in temp and "right_eye" in temp: + bbox_eyes.append(temp) + + ud.bbox_eyes = bbox_eyes + + return "succeeded" + + def __init__(self): + super(LookAtPerson, self).__init__( + outcomes=["succeeded", "failed"], + input_keys=[], + output_keys=["masks", "poses", "pointstamped"], + ) + self.DEBUG = rospy.get_param("/debug", True) + IS_SIMULATION = ( + "/pal_startup_control/start" not in rosservice.get_service_list() + ) + + with self: + if not IS_SIMULATION: + if PUBLIC_CONTAINER: + rospy.logwarn( + "You are using a public container. The head manager will not be stopped during navigation." + ) + else: + smach.StateMachine.add( + "DISABLE_HEAD_MANAGER", + smach_ros.ServiceState( + "/pal_startup_control/stop", + StartupStop, + request=StartupStopRequest("head_manager"), + ), + transitions={ + "succeeded": "GET_IMAGE", + "aborted": "failed", + "preempted": "failed", + }, + ) + smach.StateMachine.add( + "GET_IMAGE", + GetPointCloud("/xtion/depth_registered/points"), + transitions={ + "succeeded": "SEGMENT_PERSON", + }, + remapping={"pcl_msg": "pcl_msg"}, + ) + + eyes = BodyPixMaskRequest() + eyes.parts = ["left_eye", "right_eye"] + masks = [eyes] + + smach.StateMachine.add( + "SEGMENT_PERSON", + smach_ros.ServiceState( + "/bodypix/detect", + BodyPixDetection, + request_cb=lambda ud, _: BodyPixDetectionRequest( + pcl_to_img_msg(ud.pcl_msg), "resnet50", 0.7, masks + ), + response_slots=["masks", "poses"], + input_keys=["pcl_msg"], + output_keys=["masks", "poses"], + ), + transitions={ + "succeeded": "DETECT_FACES", + "aborted": "failed", + "preempted": "failed", + }, + ) + smach.StateMachine.add( + "DETECT_FACES", + DetectFaces(), + transitions={ + "succeeded": "MATCH_POSES_AND_DETECTIONS", + "failed": "failed", + }, + remapping={"pcl_msg": "pcl_msg", "detections": "detections"}, + ) + + smach.StateMachine.add( + "MATCH_POSES_AND_DETECTIONS", + CBState( + self.match_poses_and_detections, + input_keys=["poses", "detections"], + output_keys=["poses"], + outcomes=["succeeded", "failed"], + ), + transitions={"succeeded": "CHECK_EYES", "failed": "failed"}, + remapping={"bbox_eyes": "bbox_eyes"}, + ) + smach.StateMachine.add( + "CHECK_EYES", + self.CheckEyes(self.DEBUG), + transitions={ + "succeeded": "LOOK_TO_POINT", + "failed": "failed", + "no_detection": "succeeded", + }, + remapping={ + "pcl_msg": "pcl_msg", + "bbox_eyes": "bbox_eyes", + "pointstamped": "pointstamped", + }, + ) + + smach.StateMachine.add( + "LOOK_TO_POINT", + LookToPoint(), + transitions={ + "succeeded": "LOOP", + "aborted": "failed", + "preempted": "failed", + }, + remapping={"pointstamped": "pointstamped"}, + ) + smach.StateMachine.add( + "LOOP", + smach.CBState( + lambda ud: "succeeded" if len(ud.bbox_eyes) > 0 else "finish", + input_keys=["bbox_eyes"], + output_keys=["bbox_eyes"], + outcomes=["succeeded", "finish"], + ), + transitions={ + "succeeded": "CHECK_EYES", + "finish": "ENABLE_HEAD_MANAGER", + }, + ) + if not IS_SIMULATION: + if PUBLIC_CONTAINER: + rospy.logwarn( + "You are using a public container. The head manager will not be start following navigation." + ) + else: + smach.StateMachine.add( + "ENABLE_HEAD_MANAGER", + smach_ros.ServiceState( + "/pal_startup_control/start", + StartupStart, + request=StartupStartRequest("head_manager", ""), + ), + transitions={ + "succeeded": "succeeded", + "preempted": "failed", + "aborted": "failed", + }, + ) diff --git a/skills/src/lasr_skills/look_to_point.py b/skills/src/lasr_skills/look_to_point.py index 81c633ad7..92fe2e3e1 100755 --- a/skills/src/lasr_skills/look_to_point.py +++ b/skills/src/lasr_skills/look_to_point.py @@ -13,10 +13,7 @@ def __init__(self): pointing_frame="head_2_link", pointing_axis=Point(1.0, 0.0, 0.0), max_velocity=1.0, - target=PointStamped( - header=Header(frame_id="map"), - point=ud.point, - ), + target=ud.pointstamped, ), - input_keys=["point"], + input_keys=["pointstamped"], ) diff --git a/tasks/receptionist/launch/setup.launch b/tasks/receptionist/launch/setup.launch index a8fd79ede..894406256 100644 --- a/tasks/receptionist/launch/setup.launch +++ b/tasks/receptionist/launch/setup.launch @@ -19,5 +19,4 @@ - From 09bae511042c4a97b5173feb60e9efa5138bd353 Mon Sep 17 00:00:00 2001 From: Jared Swift Date: Tue, 28 May 2024 10:06:41 +0100 Subject: [PATCH 3/3] Receptionist fixes 25 04 (#207) * fix: boolify facial hair attribute. * fix: drop host attributes. * feat: use DescribePeople skill. * fix: update guest str generation. * refactor: remove redundant message. --- .../image_with_masks_and_attributes.py | 2 +- tasks/receptionist/scripts/main.py | 6 - .../src/receptionist/states/get_attributes.py | 137 ++++++------------ .../src/receptionist/states/introduce.py | 12 +- 4 files changed, 54 insertions(+), 103 deletions(-) diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py index c5df408a4..d74fdb450 100644 --- a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py +++ b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py @@ -195,7 +195,7 @@ def describe(self) -> str: "hair_colour": hair_colour_str, "hair_shape": hair_shape_str, "male": male[0], - "facial_hair": facial_hair[0], + "facial_hair": facial_hair[0] != "No_Beard", "hat": hat[0], "glasses": glasses[0], "earrings": earrings[0], diff --git a/tasks/receptionist/scripts/main.py b/tasks/receptionist/scripts/main.py index 808952737..5e9cd28e6 100755 --- a/tasks/receptionist/scripts/main.py +++ b/tasks/receptionist/scripts/main.py @@ -33,12 +33,6 @@ { "name": "John", "drink": "beer", - "attributes": { - "hair_colour": "strawberry blonde", - "glasses": False, - "hat": True, - "height": "tall", - }, }, ) outcome = receptionist.execute() diff --git a/tasks/receptionist/src/receptionist/states/get_attributes.py b/tasks/receptionist/src/receptionist/states/get_attributes.py index 22538a267..2f0dfd3bb 100644 --- a/tasks/receptionist/src/receptionist/states/get_attributes.py +++ b/tasks/receptionist/src/receptionist/states/get_attributes.py @@ -1,103 +1,56 @@ -""" -State for calling the service to get a set of guest attributes. -Currently incomplete. -""" - -import rospy import smach from smach import UserData -from typing import List, Any, Dict, Union -from lasr_vision_clip.srv import VqaRequest, VqaResponse, Vqa +from lasr_skills import DescribePeople +import json + + +class GetGuestAttributes(smach.StateMachine): + + class HandleGuestAttributes(smach.State): + def __init__(self, guest_id: str): + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["people", "guest_data"], + output_keys=["guest_data"], + ) + self._guest_id: str = guest_id + + def execute(self, userdata: UserData) -> str: + if len(userdata.people) == 0: + return "failed" + userdata.guest_data[self._guest_id]["attributes"] = json.loads( + userdata.people[0]["features"] + )["attributes"] + return "succeeded" -class GetGuestAttributes(smach.State): def __init__( self, guest_id: str, - attribute_service: Union[str, None] = None, - outcomes: List[str] = ["succeeded", "failed"], - input_keys: List[str] = ["guest_id", "guest_data"], - output_keys: List[str] = ["guest_data"], ): - """Calls and parses the service that gets a set of guest attributes. - - Args: - attribute_service (str): Name of the service to call that returns the guest's attributes. - """ - - super().__init__( - outcomes=outcomes, - input_keys=input_keys, - output_keys=output_keys, + smach.StateMachine.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["guest_id", "guest_data"], + output_keys=["guest_data"], ) - self._service_proxy = rospy.ServiceProxy("/clip_vqa/query_service", Vqa) self._guest_id: str = guest_id - self._attribute_service: Union[str, None] = attribute_service - - def _call_attribute_service(self): - # TODO - pass - - def _send_vqa_request(self, possible_answers: List[str]) -> str: - request = VqaRequest() - request.possible_answers = possible_answers - response = self._service_proxy(request) - return response.answer - - def execute(self, userdata: UserData) -> str: - if self._attribute_service: - attributes = self._call_attribute_service() - else: - glasses_answers = [ - "a person wearing glasses", - "a person not wearing glasses", - ] - hat_answers = ["a person wearing a hat", "a person not wearing a hat"] - height_answers = ["a tall person", "a short person"] - hair_colour_answers = [ - "a person with black hair", - "a person with blonde hair", - "a person with brown hair", - "a person with red hair", - "a person with grey hair", - "a person with white hair", - ] - - glasses_response = self._send_vqa_request(glasses_answers) - hat_response = self._send_vqa_request(hat_answers) - height_response = self._send_vqa_request(height_answers) - hair_colour_response = self._send_vqa_request(hair_colour_answers) - - if glasses_response == "a person wearing glasses": - glasses = True - else: - glasses = False - if hat_response == "a person wearing a hat": - hat = True - else: - hat = False - if height_response == "a tall person": - height = "tall" - else: - height = "short" - if hair_colour_response == "a person with black hair": - hair_colour = "black" - elif hair_colour_response == "a person with blonde hair": - hair_colour = "blonde" - elif hair_colour_response == "a person with brown hair": - hair_colour = "brown" - elif hair_colour_response == "a person with red hair": - hair_colour = "red" - elif hair_colour_response == "a person with grey hair": - hair_colour = "grey" - - attributes = { - "hair_colour": hair_colour, - "glasses": glasses, - "hat": hat, - "height": height, - } - - userdata.guest_data[self._guest_id]["attributes"] = attributes - return "succeeded" + with self: + smach.StateMachine.add( + "GET_GUEST_ATTRIBUTES", + DescribePeople(), + transitions={ + "succeeded": "HANDLE_GUEST_ATTRIBUTES", + "failed": "failed", + }, + ) + smach.StateMachine.add( + "HANDLE_GUEST_ATTRIBUTES", + self.HandleGuestAttributes(self._guest_id), + transitions={ + "succeeded": "succeeded", + "failed": "failed", + }, + ) diff --git a/tasks/receptionist/src/receptionist/states/introduce.py b/tasks/receptionist/src/receptionist/states/introduce.py index cedb54740..77dcc96d9 100644 --- a/tasks/receptionist/src/receptionist/states/introduce.py +++ b/tasks/receptionist/src/receptionist/states/introduce.py @@ -27,10 +27,14 @@ def stringify_guest_data(guest_data: Dict[str, Any], guest_id: str) -> str: guest_str = "" - guest_str += f"{relevant_guest_data['name']}, their favourite drink is {relevant_guest_data['drink']}. " - guest_str += f"They have {relevant_guest_data['attributes']['hair_colour']} hair, their height is {relevant_guest_data['attributes']['height']}, " - guest_str += f"they are {'wearing glasses' if relevant_guest_data['attributes']['glasses'] else 'not wearing glasses'}, and they are " - guest_str += f"{'wearing a hat' if relevant_guest_data['attributes']['hat'] else 'not wearing a hat'}." + if "attributes" in relevant_guest_data.keys(): + guest_str += f"They have {relevant_guest_data['attributes']['hair_shape']} {relevant_guest_data['attributes']['hair_colour']} hair, and they " + guest_str += f"{'have facial hair' if relevant_guest_data['attributes']['facial_hair'] else 'do not have facial hair'}. " + guest_str += f"They are {'wearing glasses' if relevant_guest_data['attributes']['glasses'] else 'not wearing glasses'}, " + guest_str += f"{'wearing earrings' if relevant_guest_data['attributes']['earrings'] else 'not wearing earrings'}, and " + guest_str += f"{'wearing a hat' if relevant_guest_data['attributes']['hat'] else 'not wearing a hat'}. " + guest_str += f"They are {'wearing a necklace' if relevant_guest_data['attributes']['necklace'] else 'not wearing a necklace'}, and " + guest_str += f"{'wearing a necktie' if relevant_guest_data['attributes']['necktie'] else 'not wearing a necktie'}. " return guest_str