From 953d35c7851b6117aa5b77fa26fb8df9fc26e8c0 Mon Sep 17 00:00:00 2001
From: Matt Barker <105945282+m-barker@users.noreply.github.com>
Date: Tue, 14 May 2024 10:47:43 +0100
Subject: [PATCH 1/3] Gesture detection (#202)
* fix: incorrect remapping keys for handover
* fix: remove speech server in launch file
* feat: add find gesture person skill, with support for raisied arms
* feat: add pointing to gesture detection
* feat: add debug publisher to gesture detection
* feat: add mimicing of gesture detected by Tiago
* feat: update CML with new pointing API
* feat: update GPSR API with new gesture detection skill
* chore: remove old pointing skill & service
* feat: make debug always be published
---
.../lasr_vision_pointing/CMakeLists.txt | 213 -----------------
.../lasr_vision_pointing/images/.gitkeep | 0
.../vision/lasr_vision_pointing/package.xml | 77 ------
.../lasr_vision_pointing/requirements.in | 1 -
.../lasr_vision_pointing/requirements.txt | 24 --
.../scripts/detect_pointing.py | 175 --------------
common/vision/lasr_vision_pointing/setup.py | 10 -
skills/config/motions.yaml | 25 ++
skills/src/lasr_skills/__init__.py | 1 +
skills/src/lasr_skills/detect_gesture.py | 97 ++++++--
skills/src/lasr_skills/detect_pointing.py | 26 ---
skills/src/lasr_skills/find_gesture_person.py | 221 ++++++++++++++++++
.../src/carry_my_luggage/state_machine.py | 31 ++-
tasks/gpsr/config/lab.yaml | 8 +-
tasks/gpsr/src/gpsr/state_machine_factory.py | 26 ++-
15 files changed, 381 insertions(+), 554 deletions(-)
delete mode 100644 common/vision/lasr_vision_pointing/CMakeLists.txt
delete mode 100644 common/vision/lasr_vision_pointing/images/.gitkeep
delete mode 100644 common/vision/lasr_vision_pointing/package.xml
delete mode 100644 common/vision/lasr_vision_pointing/requirements.in
delete mode 100644 common/vision/lasr_vision_pointing/requirements.txt
delete mode 100644 common/vision/lasr_vision_pointing/scripts/detect_pointing.py
delete mode 100644 common/vision/lasr_vision_pointing/setup.py
delete mode 100644 skills/src/lasr_skills/detect_pointing.py
create mode 100644 skills/src/lasr_skills/find_gesture_person.py
diff --git a/common/vision/lasr_vision_pointing/CMakeLists.txt b/common/vision/lasr_vision_pointing/CMakeLists.txt
deleted file mode 100644
index 66f1cf831..000000000
--- a/common/vision/lasr_vision_pointing/CMakeLists.txt
+++ /dev/null
@@ -1,213 +0,0 @@
-cmake_minimum_required(VERSION 3.0.2)
-project(lasr_vision_pointing)
-
-## Compile as C++11, supported in ROS Kinetic and newer
-# add_compile_options(-std=c++11)
-
-## Find catkin macros and libraries
-## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
-## is used, also find other catkin packages
-find_package(catkin REQUIRED COMPONENTS
- geometry_msgs
- sensor_msgs
- roscpp
- rospy
- std_msgs
- catkin_virtualenv
-)
-
-## System dependencies are found with CMake's conventions
-# find_package(Boost REQUIRED COMPONENTS system)
-
-
-## Uncomment this if the package has a setup.py. This macro ensures
-## modules and global scripts declared therein get installed
-## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
-catkin_python_setup()
-catkin_generate_virtualenv(
- INPUT_REQUIREMENTS requirements.in
- PYTHON_INTERPRETER python3.10
-)
-
-################################################
-## Declare ROS messages, services and actions ##
-################################################
-
-## To declare and build messages, services or actions from within this
-## package, follow these steps:
-## * Let MSG_DEP_SET be the set of packages whose message types you use in
-## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
-## * In the file package.xml:
-## * add a build_depend tag for "message_generation"
-## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
-## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
-## but can be declared for certainty nonetheless:
-## * add a exec_depend tag for "message_runtime"
-## * In this file (CMakeLists.txt):
-## * add "message_generation" and every package in MSG_DEP_SET to
-## find_package(catkin REQUIRED COMPONENTS ...)
-## * add "message_runtime" and every package in MSG_DEP_SET to
-## catkin_package(CATKIN_DEPENDS ...)
-## * uncomment the add_*_files sections below as needed
-## and list every .msg/.srv/.action file to be processed
-## * uncomment the generate_messages entry below
-## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
-
-## Generate messages in the 'msg' folder
-# add_message_files(
-# FILES
-# Message1.msg
-# Message2.msg
-# )
-
-## Generate services in the 'srv' folder
-# add_service_files(
-# FILES
-# PointingDirection.srv
-# )
-
-## Generate actions in the 'action' folder
-# add_action_files(
-# FILES
-# Action1.action
-# Action2.action
-# )
-
-## Generate added messages and services with any dependencies listed here
-# generate_messages(
-# DEPENDENCIES
-# geometry_msgs
-# std_msgs
-# sensor_msgs
-# )
-
-################################################
-## Declare ROS dynamic reconfigure parameters ##
-################################################
-
-## To declare and build dynamic reconfigure parameters within this
-## package, follow these steps:
-## * In the file package.xml:
-## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
-## * In this file (CMakeLists.txt):
-## * add "dynamic_reconfigure" to
-## find_package(catkin REQUIRED COMPONENTS ...)
-## * uncomment the "generate_dynamic_reconfigure_options" section below
-## and list every .cfg file to be processed
-
-## Generate dynamic reconfigure parameters in the 'cfg' folder
-# generate_dynamic_reconfigure_options(
-# cfg/DynReconf1.cfg
-# cfg/DynReconf2.cfg
-# )
-
-###################################
-## catkin specific configuration ##
-###################################
-## The catkin_package macro generates cmake config files for your package
-## Declare things to be passed to dependent projects
-## INCLUDE_DIRS: uncomment this if your package contains header files
-## LIBRARIES: libraries you create in this project that dependent projects also need
-## CATKIN_DEPENDS: catkin_packages dependent projects also need
-## DEPENDS: system dependencies of this project that dependent projects also need
-catkin_package(
-# INCLUDE_DIRS include
-# LIBRARIES lasr_vision_pointing
- CATKIN_DEPENDS geometry_msgs roscpp rospy std_msgs sensor_msgs
-# DEPENDS system_lib
-)
-
-###########
-## Build ##
-###########
-
-## Specify additional locations of header files
-## Your package locations should be listed before other locations
-include_directories(
-# include
- ${catkin_INCLUDE_DIRS}
-)
-
-## Declare a C++ library
-# add_library(${PROJECT_NAME}
-# src/${PROJECT_NAME}/lasr_vision_pointing.cpp
-# )
-
-## Add cmake target dependencies of the library
-## as an example, code may need to be generated before libraries
-## either from message generation or dynamic reconfigure
-# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
-
-## Declare a C++ executable
-## With catkin_make all packages are built within a single CMake context
-## The recommended prefix ensures that target names across packages don't collide
-# add_executable(${PROJECT_NAME}_node src/lasr_vision_pointing_node.cpp)
-
-## Rename C++ executable without prefix
-## The above recommended prefix causes long target names, the following renames the
-## target back to the shorter version for ease of user use
-## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
-# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
-
-## Add cmake target dependencies of the executable
-## same as for the library above
-# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
-
-## Specify libraries to link a library or executable target against
-# target_link_libraries(${PROJECT_NAME}_node
-# ${catkin_LIBRARIES}
-# )
-
-#############
-## Install ##
-#############
-
-# all install targets should use catkin DESTINATION variables
-# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
-
-## Mark executable scripts (Python etc.) for installation
-## in contrast to setup.py, you can choose the destination
-catkin_install_python(PROGRAMS
- scripts/detect_pointing.py
- DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-)
-
-## Mark executables for installation
-## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
-# install(TARGETS ${PROJECT_NAME}_node
-# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
-# )
-
-## Mark libraries for installation
-## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
-# install(TARGETS ${PROJECT_NAME}
-# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
-# )
-
-## Mark cpp header files for installation
-# install(DIRECTORY include/${PROJECT_NAME}/
-# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
-# FILES_MATCHING PATTERN "*.h"
-# PATTERN ".svn" EXCLUDE
-# )
-
-## Mark other files for installation (e.g. launch and bag files, etc.)
-install(FILES
- requirements.txt
- DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
-)
-
-#############
-## Testing ##
-#############
-
-## Add gtest based cpp test target and link libraries
-# catkin_add_gtest(${PROJECT_NAME}-test test/test_lasr_vision_pointing.cpp)
-# if(TARGET ${PROJECT_NAME}-test)
-# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
-# endif()
-
-## Add folders to be run by python nosetests
-# catkin_add_nosetests(test)
diff --git a/common/vision/lasr_vision_pointing/images/.gitkeep b/common/vision/lasr_vision_pointing/images/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/common/vision/lasr_vision_pointing/package.xml b/common/vision/lasr_vision_pointing/package.xml
deleted file mode 100644
index d99ef49f8..000000000
--- a/common/vision/lasr_vision_pointing/package.xml
+++ /dev/null
@@ -1,77 +0,0 @@
-
-
- lasr_vision_pointing
- 0.0.0
- The lasr_vision_pointing package
-
-
-
-
- Serge Alkhalil
-
-
-
-
-
- TODO
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- catkin
- geometry_msgs
- roscpp
- rospy
- std_msgs
- sensor_msgs
- geometry_msgs
- roscpp
- rospy
- std_msgs
- geometry_msgs
- roscpp
- rospy
- std_msgs
- sensor_msgs
- lasr_vision_msgs
-
-
- catkin_virtualenv
-
-
-
-
- requirements.txt
-
-
-
diff --git a/common/vision/lasr_vision_pointing/requirements.in b/common/vision/lasr_vision_pointing/requirements.in
deleted file mode 100644
index c1b708709..000000000
--- a/common/vision/lasr_vision_pointing/requirements.in
+++ /dev/null
@@ -1 +0,0 @@
-mediapipe==0.10.10
\ No newline at end of file
diff --git a/common/vision/lasr_vision_pointing/requirements.txt b/common/vision/lasr_vision_pointing/requirements.txt
deleted file mode 100644
index e6ef834ab..000000000
--- a/common/vision/lasr_vision_pointing/requirements.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-absl-py==2.1.0 # via mediapipe
-attrs==23.2.0 # via mediapipe
-cffi==1.16.0 # via sounddevice
-contourpy==1.2.1 # via matplotlib
-cycler==0.12.1 # via matplotlib
-flatbuffers==24.3.25 # via mediapipe
-fonttools==4.51.0 # via matplotlib
-jax==0.4.26 # via mediapipe
-kiwisolver==1.4.5 # via matplotlib
-matplotlib==3.8.4 # via mediapipe
-mediapipe==0.10.10 # via -r requirements.in
-ml-dtypes==0.4.0 # via jax
-numpy==1.26.4 # via contourpy, jax, matplotlib, mediapipe, ml-dtypes, opencv-contrib-python, opt-einsum, scipy
-opencv-contrib-python==4.9.0.80 # via mediapipe
-opt-einsum==3.3.0 # via jax
-packaging==24.0 # via matplotlib
-pillow==10.3.0 # via matplotlib
-protobuf==3.20.3 # via mediapipe
-pycparser==2.22 # via cffi
-pyparsing==3.1.2 # via matplotlib
-python-dateutil==2.9.0.post0 # via matplotlib
-scipy==1.13.0 # via jax
-six==1.16.0 # via python-dateutil
-sounddevice==0.4.6 # via mediapipe
diff --git a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py b/common/vision/lasr_vision_pointing/scripts/detect_pointing.py
deleted file mode 100644
index b56e0d14e..000000000
--- a/common/vision/lasr_vision_pointing/scripts/detect_pointing.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import cv2
-import rospkg
-import rospy
-import mediapipe as mp
-from cv_bridge import CvBridge
-from lasr_vision_msgs.srv import (
- YoloDetectionRequest,
- YoloDetection,
- PointingDirection,
- PointingDirectionResponse,
- PointingDirectionRequest,
-)
-
-
-class PointingDetector:
- def __init__(self):
- self.detect_service = rospy.ServiceProxy("/yolov8/detect", YoloDetection)
- self.service = rospy.Service(
- "/pointing_detection_service", PointingDirection, self.excute
- )
- self.counter = 0
-
- # Load MediaPipe Pose model
- self.mp_pose = mp.solutions.pose
- self.pose = self.mp_pose.Pose()
-
- def excute(self, req: PointingDirectionRequest) -> PointingDirectionResponse:
- img = req.image_raw
- resp = PointingDirectionResponse()
- people_bounding_box = self.detection(img)
-
- if people_bounding_box:
- for person in people_bounding_box:
- keypoints = self.detect_keypoints(
- img
- ) # Detect keypoints using MediaPipe
- direction = self.determine_pointing_direction(
- keypoints, buffer_width=25
- )
- rospy.loginfo(f"Person detected pointing: {direction}")
-
- # Visualize pointing direction with landmarks
- image_path = os.path.join(
- rospkg.RosPack().get_path("lasr_vision_pointing"),
- "images",
- f"image{self.counter}.jpg",
- )
- self.visualize_pointing_direction_with_landmarks(
- image_path, person, direction, keypoints
- )
-
- resp.direction = direction
- else:
- resp.direction = "NONE"
-
- self.counter += 1
- return resp
-
- def store_image(self, img):
- package_path = rospkg.RosPack().get_path("lasr_vision_pointing")
- os.chdir(os.path.abspath(os.path.join(package_path, "images")))
- cv2.imwrite(f"image{self.counter}.jpg", img)
-
- def detect_keypoints(self, img):
- img_bridge = CvBridge().imgmsg_to_cv2(img, desired_encoding="passthrough")
- img_rgb = cv2.cvtColor(img_bridge, cv2.COLOR_BGR2RGB)
-
- # store the image
- self.store_image(img_rgb)
-
- results = self.pose.process(img_rgb)
-
- keypoints = []
- if results.pose_landmarks:
- for landmark in results.pose_landmarks.landmark:
- x = int(landmark.x * img.width)
- y = int(landmark.y * img.height)
- keypoints.append((x, y))
-
- return keypoints
-
- def detection(self, img):
- request = YoloDetectionRequest()
- request.image_raw = img # sensor_msgs/Image
- request.dataset = "yolov8n-seg.pt" # YOLOv8 model, auto-downloads
- request.confidence = 0.7 # minimum confidence to include in results
- request.nms = 0.4 # non maximal supression
-
- # send request
- response = self.detect_service(request)
-
- result = []
- for detection in response.detected_objects:
- if detection.name == "person":
- # cords of person in image
- result.append(detection.xywh)
-
- return result
-
- def determine_pointing_direction(self, keypoints, buffer_width=50):
- # Ensure keypoints are available
- if (
- len(keypoints) >= 7
- ): # Ensure we have at least 7 keypoints for the upper body
- # Extract relevant keypoints for shoulders and wrists
- left_shoulder = keypoints[self.mp_pose.PoseLandmark.LEFT_SHOULDER.value]
- right_shoulder = keypoints[self.mp_pose.PoseLandmark.RIGHT_SHOULDER.value]
- left_wrist = keypoints[self.mp_pose.PoseLandmark.LEFT_WRIST.value]
- right_wrist = keypoints[self.mp_pose.PoseLandmark.RIGHT_WRIST.value]
-
- # Ensure all keypoints are detected
- if left_shoulder and right_shoulder and left_wrist and right_wrist:
- # Calculate the x-coordinate difference between shoulders and wrists
- left_diff = left_wrist[0] - left_shoulder[0]
- right_diff = right_shoulder[0] - right_wrist[0]
-
- # Determine pointing direction based on the difference in x-coordinates
- if abs(left_diff - right_diff) < buffer_width:
- return "FORWARDS"
- elif abs(left_diff) > buffer_width and abs(left_diff) > abs(right_diff):
- return "LEFT" if left_diff > 0 else "RIGHT"
- elif abs(right_diff) > buffer_width and abs(right_diff) > abs(
- left_diff
- ):
- return "RIGHT" if right_diff > 0 else "LEFT"
-
- # Default: Determine direction based on the relative position to the center of the image
- return "NONE"
-
- def visualize_pointing_direction_with_landmarks(
- self, image_path, person_bbox, pointing_direction, keypoints
- ):
- # Load image
- img = cv2.imread(image_path)
-
- # Extract person bbox coordinates
- x, y, w, h = person_bbox
- # Calculate center of bbox
- center_x = x + w // 2
- center_y = y + h // 2
-
- # Calculate endpoint of arrow based on pointing direction
- arrow_length = min(w, h) // 2
- if pointing_direction == "LEFT":
- endpoint = (center_x - arrow_length, center_y)
- elif pointing_direction == "RIGHT":
- endpoint = (center_x + arrow_length, center_y)
- elif pointing_direction == "FORWARDS":
- endpoint = (center_x, center_y)
- else:
- return # No pointing direction detected
-
- # Draw arrow on image
- color = (0, 255, 0) # Green color
- thickness = 2
- cv2.arrowedLine(img, (center_x, center_y), endpoint, color, thickness)
-
- # Draw landmarks (keypoints) on the image
- for keypoint in keypoints:
- if keypoint:
- cv2.circle(img, keypoint, 3, (0, 0, 255), -1) # Red color for landmarks
-
- # Store image with pointing direction visualization and landmarks
- output_image_path = f"landmark_image{self.counter}.jpg" # Change the output image path as needed
- cv2.imwrite(output_image_path, img)
-
-
-if __name__ == "__main__":
- rospy.init_node("pointing_detector")
- pointer = PointingDetector()
- rospy.loginfo("Pointing Detector is running")
- rospy.spin()
diff --git a/common/vision/lasr_vision_pointing/setup.py b/common/vision/lasr_vision_pointing/setup.py
deleted file mode 100644
index e094797e8..000000000
--- a/common/vision/lasr_vision_pointing/setup.py
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-
-from distutils.core import setup
-from catkin_pkg.python_setup import generate_distutils_setup
-
-setup_args = generate_distutils_setup(
- packages=["lasr_vision_pointing"], package_dir={"": "src"}
-)
-
-setup(**setup_args)
diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml
index 302eb6083..2b98cc74e 100644
--- a/skills/config/motions.yaml
+++ b/skills/config/motions.yaml
@@ -39,4 +39,29 @@ play_motion:
joints: [head_1_joint, head_2_joint]
points:
- positions: [0.0, 0.0]
+ time_from_start: 0.0
+ raise_torso:
+ joints: [torso_lift_joint]
+ points:
+ - positions: [0.5]
+ time_from_start: 0.0
+ pointing_to_the_right:
+ joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+ points:
+ - positions: [2.70, -1.47, 0.38, 1.92, 1.66, 0.42, 0.08]
+ time_from_start: 0.0
+ pointing_to_the_left:
+ joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+ points:
+ - positions: [0.32, -1.42, -0.39, 2.05, 1.62, 0.56, 0.09]
+ time_from_start: 0.0
+ raising_right_arm:
+ joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+ points:
+ - positions: [0.12, 0.09, -3.16, 1.65, 1.53, 0.00, 0.13]
+ time_from_start: 0.0
+ raising_left_arm:
+ joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint]
+ points:
+ - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13]
time_from_start: 0.0
\ No newline at end of file
diff --git a/skills/src/lasr_skills/__init__.py b/skills/src/lasr_skills/__init__.py
index 736ee4823..4bceb19ca 100755
--- a/skills/src/lasr_skills/__init__.py
+++ b/skills/src/lasr_skills/__init__.py
@@ -21,3 +21,4 @@
from .recognise import Recognise
from .detect_pointing import DetectPointingDirection
from .detect_gesture import DetectGesture
+from .find_gesture_person import FindGesturePerson
diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py
index 5ac565366..28d4f84cb 100755
--- a/skills/src/lasr_skills/detect_gesture.py
+++ b/skills/src/lasr_skills/detect_gesture.py
@@ -1,9 +1,14 @@
#!/usr/bin/env python3
+from typing import Optional
import smach
import rospy
+import cv2
+import cv2_img
from lasr_skills.vision import GetImage
+from lasr_skills import PlayMotion
from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest
from lasr_vision_msgs.msg import BodyPixMaskRequest
+from sensor_msgs.msg import Image
class DetectGesture(smach.State):
@@ -11,16 +16,25 @@ class DetectGesture(smach.State):
State for detecting gestures.
"""
- def __init__(self, gesture_to_detect: str = "raising_left_hand"):
- self.gesture_to_detect = gesture_to_detect
+ def __init__(
+ self,
+ gesture_to_detect: Optional[str] = None,
+ buffer_width: int = 50,
+ ):
+ """Optionally stores the gesture to detect. If None, it will infer the gesture from the keypoints."""
smach.State.__init__(
self,
outcomes=["succeeded", "failed"],
input_keys=["img_msg"],
output_keys=["gesture_detected"],
)
-
+ self.debug = debug
+ self.gesture_to_detect = gesture_to_detect
self.body_pix_client = rospy.ServiceProxy("/bodypix/detect", BodyPixDetection)
+ self.debug_publisher = rospy.Publisher(
+ "/gesture_detection/debug", Image, queue_size=1
+ )
+ self.buffer_width = buffer_width
def execute(self, userdata):
@@ -54,29 +68,67 @@ def execute(self, userdata):
"y": keypoint.xy[1],
"score": keypoint.score,
}
-
- if self.gesture_to_detect == "raising_left_hand":
+ if (
+ self.gesture_to_detect == "raising_left_arm"
+ or self.gesture_to_detect is None
+ ):
if part_info["leftWrist"]["y"] < part_info["leftShoulder"]["y"]:
- userdata.gesture_detected = True
- else:
- userdata.gesture_detected = False
- elif self.gesture_to_detect == "raising_right_hand":
+ self.gesture_to_detect = "raising_left_arm"
+ if (
+ self.gesture_to_detect == "raising_right_arm"
+ or self.gesture_to_detect is None
+ ):
if part_info["rightWrist"]["y"] < part_info["rightShoulder"]["y"]:
- userdata.gesture_detected = True
- else:
- userdata.gesture_detected = False
- else:
- raise ValueError("Invalid gesture to detect")
+ self.gesture_to_detect = "raising_right_arm"
+ if (
+ self.gesture_to_detect == "pointing_to_the_left"
+ or self.gesture_to_detect is None
+ ):
+ if (
+ part_info["leftWrist"]["x"] - self.buffer_width
+ > part_info["leftShoulder"]["x"]
+ ):
+ self.gesture_to_detect = "pointing_to_the_left"
+ if (
+ self.gesture_to_detect == "pointing_to_the_right"
+ or self.gesture_to_detect is None
+ ):
+ if (
+ part_info["rightShoulder"]["x"] - self.buffer_width
+ > part_info["rightWrist"]["x"]
+ ):
+ self.gesture_to_detect = "pointing_to_the_right"
+
+ if self.gesture_to_detect is None:
+ self.gesture_to_detect = "none"
+
+ userdata.gesture_detected = self.gesture_to_detect
+
+ cv2_gesture_img = cv2_img.msg_to_cv2_img(userdata.img_msg)
+ # Add text to the image
+ cv2.putText(
+ cv2_gesture_img,
+ self.gesture_to_detect,
+ (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 1,
+ (0, 255, 0),
+ 2,
+ cv2.LINE_AA,
+ )
+ # Publish the image
+ self.debug_publisher.publish(cv2_img.cv2_img_to_msg(cv2_gesture_img))
return "succeeded"
+### For example usage:
class GestureDetectionSM(smach.StateMachine):
"""
State machine for detecting gestures.
"""
- def __init__(self, gesture_to_detect: str = "raising_left_hand"):
+ def __init__(self, gesture_to_detect: Optional[str] = None):
smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
self.gesture_to_detect = gesture_to_detect
with self:
@@ -95,9 +147,22 @@ def __init__(self, gesture_to_detect: str = "raising_left_hand"):
if __name__ == "__main__":
rospy.init_node("gesture_detection_sm")
+ ### Example usage:
while not rospy.is_shutdown():
sm = GestureDetectionSM()
sm.execute()
- print("Raising Left Hand Detected:", sm.userdata.gesture_detected)
+ gesture_state = PlayMotion(motion_name=sm.userdata.gesture_detected)
+ gesture_sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+ with gesture_sm:
+ smach.StateMachine.add(
+ "GESTURE_STATE",
+ gesture_state,
+ transitions={
+ "succeeded": "succeeded",
+ "aborted": "failed",
+ "preempted": "failed",
+ },
+ )
+ gesture_sm.execute()
rospy.spin()
diff --git a/skills/src/lasr_skills/detect_pointing.py b/skills/src/lasr_skills/detect_pointing.py
deleted file mode 100644
index 21895dfc1..000000000
--- a/skills/src/lasr_skills/detect_pointing.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import rospy
-import smach
-from sensor_msgs.msg import Image
-
-from lasr_vision_msgs.srv import PointingDirection
-
-
-class DetectPointingDirection(smach.State):
- def __init__(self, image_topic: str = "/xtion/rgb/image_raw"):
- smach.State.__init__(
- self, outcomes=["succeeded", "failed"], output_keys=["pointing_direction"]
- )
- self._image_topic = image_topic
- self._pointing_service = rospy.ServiceProxy(
- "/pointing_detection_service", PointingDirection
- )
- self._pointing_service.wait_for_service()
-
- def execute(self, userdata):
-
- img_msg = rospy.wait_for_message(self._image_topic, Image)
- resp = self._pointing_service(img_msg)
- if resp.direction == "NONE" or resp.direction == "FORWARDS":
- return "failed"
- userdata.pointing_direction = resp.direction
- return "succeeded"
diff --git a/skills/src/lasr_skills/find_gesture_person.py b/skills/src/lasr_skills/find_gesture_person.py
new file mode 100644
index 000000000..ae195494a
--- /dev/null
+++ b/skills/src/lasr_skills/find_gesture_person.py
@@ -0,0 +1,221 @@
+import smach
+import rospy
+
+from lasr_skills import Detect3D, GoToLocation, DetectGesture, PlayMotion
+from lasr_skills.vision import GetImage
+import navigation_helpers
+
+from geometry_msgs.msg import Pose, PoseWithCovarianceStamped, Point, Quaternion
+
+
+from typing import List, Union
+
+
+class FindGesturePerson(smach.StateMachine):
+ class GetLocation(smach.State):
+ def __init__(self):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["location_index", "waypoints"],
+ output_keys=["location"],
+ )
+
+ def execute(self, userdata):
+ userdata.location = userdata.waypoints[userdata.location_index]
+ return "succeeded"
+
+ class GetPose(smach.State):
+ def __init__(self):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ output_keys=["current_pose"],
+ )
+
+ def execute(self, userdata):
+ userdata.current_pose = rospy.wait_for_message(
+ "/amcl_pose", PoseWithCovarianceStamped
+ ).pose.pose
+ return "succeeded"
+
+ class ComputePath(smach.State):
+ def __init__(self, waypoints: List[Pose]):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["current_pose"],
+ output_keys=["waypoints"],
+ )
+ self._waypoints = waypoints
+
+ def execute(self, userdata):
+ userdata.waypoints = navigation_helpers.min_hamiltonian_path(
+ userdata.current_pose, self._waypoints
+ )
+ return "succeeded"
+
+ class HandleDetections(smach.State):
+ def __init__(self):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["detections_3d"],
+ output_keys=["person_point"],
+ )
+
+ def execute(self, userdata):
+ if len(userdata.detections_3d.detected_objects) == 0:
+ rospy.logwarn("No person detected, returning failed.")
+ return "failed"
+ userdata.person_point = userdata.detections_3d.detected_objects[0].point
+ return "succeeded"
+
+ class HandleResponse(smach.State):
+ def __init__(self):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["gesture_detected"],
+ )
+
+ def execute(self, userdata):
+ if userdata.gesture_detected:
+ return "succeeded"
+ return "failed"
+
+ def __init__(
+ self,
+ gesture: str,
+ waypoints: Union[List[Pose], None] = None,
+ location_param: Union[str, None] = None,
+ ):
+ smach.StateMachine.__init__(
+ self, outcomes=["succeeded", "failed"], output_keys=["person_point"]
+ )
+ if waypoints is None and location_param is None:
+ raise ValueError("Either waypoints or location_param must be provided")
+
+ if gesture == "person raising their left arm":
+ self.gesture = "raising_left_arm"
+ elif gesture == "person raising their right arm":
+ self.gesture = "raising_right_arm"
+ elif gesture == "person pointing to the right":
+ self.gesture == "pointing_to_the_right"
+ elif gesture == "person pointing to the left":
+ self.gesture == "pointing_to_the_left"
+ else:
+ raise ValueError(f"Gesture {gesture} not recognized")
+
+ if waypoints is None:
+ waypoints_to_iterate: List[Pose] = []
+
+ room = rospy.get_param(location_param)
+ beacons = room["beacons"]
+ for beacon in beacons:
+ waypoint: Pose = Pose(
+ position=Point(**beacons[beacon]["near_pose"]["position"]),
+ orientation=Quaternion(
+ **beacons[beacon]["near_pose"]["orientation"]
+ ),
+ )
+ waypoints_to_iterate.append(waypoint)
+ else:
+ waypoints_to_iterate: List[Pose] = waypoints
+
+ with self:
+ smach.StateMachine.add(
+ "GET_POSE",
+ self.GetPose(),
+ transitions={"succeeded": "COMPUTE_PATH", "failed": "failed"},
+ )
+
+ smach.StateMachine.add(
+ "COMPUTE_PATH",
+ self.ComputePath(waypoints_to_iterate),
+ transitions={"succeeded": "WAYPOINT_ITERATOR", "failed": "failed"},
+ )
+
+ waypoint_iterator = smach.Iterator(
+ outcomes=["succeeded", "failed"],
+ it=lambda: range(len(waypoints_to_iterate)),
+ it_label="location_index",
+ input_keys=["waypoints"],
+ output_keys=["person_point"],
+ exhausted_outcome="failed",
+ )
+
+ with waypoint_iterator:
+ container_sm = smach.StateMachine(
+ outcomes=["succeeded", "failed", "continue"],
+ input_keys=["location_index", "waypoints"],
+ output_keys=["person_point"],
+ )
+
+ with container_sm:
+ smach.StateMachine.add(
+ "GET_LOCATION",
+ self.GetLocation(),
+ transitions={"succeeded": "GO_TO_LOCATION", "failed": "failed"},
+ )
+
+ smach.StateMachine.add(
+ "GO_TO_LOCATION",
+ GoToLocation(),
+ transitions={
+ "succeeded": "DETECT3D",
+ "failed": "failed",
+ },
+ )
+ smach.StateMachine.add(
+ "DETECT3D",
+ Detect3D(filter=["person"]),
+ transitions={
+ "succeeded": "HANDLE_DETECTIONS",
+ "failed": "failed",
+ },
+ )
+ smach.StateMachine.add(
+ "HANDLE_DETECTIONS",
+ self.HandleDetections(),
+ transitions={
+ "succeeded": "PLAY_MOTION",
+ "failed": "continue",
+ },
+ ),
+ smach.StateMachine.add(
+ "PLAY_MOTION",
+ PlayMotion(motion_name="raise_torso"),
+ transitions={
+ "succeeded": "GET_IMAGE",
+ "aborted": "failed",
+ "preempted": "failed",
+ },
+ ),
+ smach.StateMachine.add(
+ "GET_IMAGE",
+ GetImage(),
+ transitions={"succeeded": "CHECK_GESTURE", "failed": "failed"},
+ )
+ smach.StateMachine.add(
+ "CHECK_GESTURE",
+ DetectGesture(self.gesture),
+ transitions={
+ "succeeded": "HANDLE_RESPONSE",
+ "failed": "failed",
+ },
+ )
+ smach.StateMachine.add(
+ "HANDLE_RESPONSE",
+ self.HandleResponse(),
+ transitions={"succeeded": "succeeded", "failed": "continue"},
+ )
+
+ waypoint_iterator.set_contained_state(
+ "CONTAINER_STATE", container_sm, loop_outcomes=["continue"]
+ )
+ smach.StateMachine.add(
+ "WAYPOINT_ITERATOR",
+ waypoint_iterator,
+ {"succeeded": "succeeded", "failed": "failed"},
+ )
diff --git a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
index 39cd67ce5..00c743c20 100644
--- a/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
+++ b/tasks/carry_my_luggage/src/carry_my_luggage/state_machine.py
@@ -4,7 +4,7 @@
WaitForPerson,
DetectPointingDirection,
Say,
- PlayMotion,
+ DetectGesture,
ReceiveObject,
)
@@ -13,6 +13,24 @@
class CarryMyLuggage(smach.StateMachine):
+ class ProcessPointingDirection(smach.State):
+ def __init__(self):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["gesture_detected"],
+ output_keys=["pointing_direction"],
+ )
+
+ def execute(self, userdata):
+ if userdata.gesture_detected == "pointing_to_the_left":
+ userdata.pointing_direction = "left"
+ elif userdata.gesture_detected == "pointing_to_the_right":
+ userdata.pointing_direction = "right"
+ else:
+ return "failed"
+ return "succeeded"
+
def __init__(self):
smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"])
@@ -28,7 +46,16 @@ def __init__(self):
smach.StateMachine.add(
"DETECT_POINTING_DIRECTION",
- DetectPointingDirection(),
+ DetectGesture(),
+ transitions={
+ "succeeded": "PROCESS_POINTING_DIRECTION",
+ "failed": "SAY_FAILED_POINTING",
+ },
+ )
+
+ smach.StateMachine.add(
+ "PROCESS_POINTING_DIRECTION",
+ CarryMyLuggage.ProcessPointingDirection(),
transitions={
"succeeded": "SAY_BAG",
"failed": "SAY_FAILED_POINTING",
diff --git a/tasks/gpsr/config/lab.yaml b/tasks/gpsr/config/lab.yaml
index 96cd8cd52..f62e2c4c8 100644
--- a/tasks/gpsr/config/lab.yaml
+++ b/tasks/gpsr/config/lab.yaml
@@ -9,8 +9,8 @@ arena:
beacons:
tv_desk:
near_pose:
- position: {x: 0.96, y: 0.82, z: 0.0}
- orientation: {x: 0.0, y: 0.0, z: 0.76, w: 0.65}
+ position: {x: 0.989603867147269, y: 0.47950265241289325, z: 0.0}
+ orientation: {x: 0.0, y: 0.0, z: 0.738945375913055, w: 0.6737653385391044}
near_polygon: [[-0.11, 0.00], [-0.08, 2.36], [1.84, 2.75], [2.02, 1.31]]
search_pose:
position: {x: 0.97, y: 1.60, z: 0.0}
@@ -18,8 +18,8 @@ arena:
search_polygon: [[0.40, 1.92], [0.31, 2.45], [1.36, 2.70], [1.456, 2.17]]
window_desk:
near_pose:
- position: {x: 2.38, y: -3.0, z: 0.0}
- orientation: {x: 0.0, y: 0.0, z: -0.62, w: 0.79}
+ position: {x: 2.6771759491107954, y: -2.2998999761206105, z: 0.0}
+ orientation: {x: 0.0, y: 0.0, z: -0.6148810103509047, w: 0.7886198977389871}
near_polygon: [[3.34, -3.27], [3.75, -4.80], [2.47, -5.20], [1.75, -3.58]]
search_pose:
position: {x: 2.47, y: -3.73, z: 0.0}
diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py
index 0208686dd..2e005919b 100644
--- a/tasks/gpsr/src/gpsr/state_machine_factory.py
+++ b/tasks/gpsr/src/gpsr/state_machine_factory.py
@@ -2,7 +2,7 @@
import rospy
import smach
from typing import Dict, List
-from lasr_skills import GoToLocation, FindNamedPerson # type: ignore
+from lasr_skills import GoToLocation, FindNamedPerson, FindGesturePerson # type: ignore
from gpsr.states import Talk
@@ -43,11 +43,25 @@ def _handle_find_command(self, command_param: Dict):
def _handle_talk_command(self, command_param: Dict):
if "gesture" in command_param:
- raise NotImplementedError("Talk command with gesture not implemented")
- elif "text" in command_param:
+ if not "location" in command_param:
+ raise ValueError(
+ "Talk command with gesture but no room in command parameters"
+ )
+ location_param_room = f"/gpsr/arena/rooms/{command_param['location']}"
+ self.sm.add(
+ f"STATE_{self._increment_state_count()}",
+ FindGesturePerson(
+ location_param=location_param_room, gesture=command_param["gesture"]
+ ),
+ transitions={
+ "succeeded": f"STATE_{self.state_count + 1}",
+ "failed": "failed",
+ },
+ )
+ if "talk" in command_param:
self.sm.add(
f"STATE_{self._increment_state_count()}",
- Talk(text=command_param["text"]),
+ Talk(talk_phrase=command_param["talk"]),
transitions={
"succeeded": f"STATE_{self.state_count + 1}",
"failed": "failed",
@@ -143,8 +157,8 @@ def build_state_machine(self) -> smach.StateMachine:
raise ValueError(f"Invalid command verb: {command_verb}")
self.sm.add(
- f"STATE_{self.state_count}",
- Talk(text="I have completed the task."),
+ f"STATE_{self.state_count+1}",
+ Talk(talk_phrase="I have completed the task."),
transitions={"succeeded": "succeeded", "failed": "failed"},
)
From d7c3ca98609481dd80b5cb3bf8193831b6728372 Mon Sep 17 00:00:00 2001
From: fireblonde <71558254+fireblonde@users.noreply.github.com>
Date: Fri, 24 May 2024 12:30:47 +0100
Subject: [PATCH 2/3] feat: look at person (#166)
* refactor: use correct naming conventions.
* feat/refactor: introduce generic object detection skills, modularise from this.
* refactor: let the generic perception skills acquire their own sensor input (image/pcl).
* refactor/chore: imports and add LookForSeats stub.
* feat: always publish markers for debugging, when doing 3D point estimation.
* WIP: look for place to seat people.
* chore: default sample rate.
* chore: setup receptionist demo points.
* fix: sleep.
* feat: LookToPoint skill.
* chore: reorganise receptionist Rasa data and pipeline.
* feat: additional motions.
* chore: update receptionist demo.
* feat: look for chair and seat guest.
* WIP: get guest name.
* feat: be more informative.
* fix: set unknown.
* feat: announce name as well as drink.
* refactor: ignore point.
* feat: be more explicit; look at the chair and semantically describe its location.
* WIP.
* feat: add option to specify device to use for whisper.
* feat: simple transcribe microphone service.
* refactor: make transcribe and parse service use the new 'simple' transcribe audio service.
* feat: terminate audio interface.
* chore: update CMakeLists.
* refactor: acquire microphone when initialising service.
* chore: update launch file.
* fix: remove call to redundant method,
* fix: error handling.
* feat/fix: pre-load whisper model by prompting it, adjust for ambient noise at the beginning.
* chore: configuration required for deploying docs
* docs: update container documentation
* ci: first draft of documentation build through actions
* ci: remove submodules
* ci: use ubuntu 20.04
* ci: try using ros:noetic container image
* ci: catkin tools isn't available in container
* ci: remove extranous source line
* ci: try the one-liner ROS installer
* ci: use the official ROS noetic installation guide
* ci: install catkin tools
* ci: try sourcing before commands
* ci: deploy to github pages (no stage check yet)
* ci: use deploy key
* ci: only deploy if on main
* chore: update receptionist data and pipeline.
* fix: explicitly set spacy NLP model.
* fix: drop age for now, since spacy is broken(?).
* feat: point and raise_torso motions.
* feat: point to chair.
* feat: additional motions.
* fix: reset motions.
* fix: handle empty transcriptions.
* fix: enable start state.
* fix: properly reset motions
* fix(ci): use github_token for gh pages deploy.
* revert: fix(ci): use github_token for gh pages deploy.
* feat: save microphone output to audio files
* chore: update gitignore with python setup files
* fix: filename typo and add to cmake
* testtesttest
* Test
* Test
* Message Change
* fixed message writting and reading
* fixed image crop
* Updated the feature extraction node and changed the messages.
* fixed that torsal and head frames were inversed.
* Changed colour format from BGR to RGB within the detection process.
* keep the saved model
* keep the model file
* keep the saved model
* Cancel saving the images (but sitll cannot see use cv2.imshow)
* Runnable demo
* added the hair colour distribution matching method
* retrained model is very robust so changed the threshold
* Moving the head to meet the person.
* xyz axis readable.
* (Hopefully) Runnable with 3d input.
* Speak normally.
* Try to move the head.
* testtesttest
* Test
* Test
* Message Change
* fixed message writting and reading
* fixed image crop
* Updated the feature extraction node and changed the messages.
* fixed that torsal and head frames were inversed.
* Changed colour format from BGR to RGB within the detection process.
* keep the saved model
* keep the model file
* keep the saved model
* Cancel saving the images (but sitll cannot see use cv2.imshow)
* Runnable demo
* added the hair colour distribution matching method
* retrained model is very robust so changed the threshold
* Moving the head to meet the person.
* xyz axis readable.
* (Hopefully) Runnable with 3d input.
* ah
* At least the head moves, not looking at me though.
* Cleaned the file to have only one model appear.
* Replace the old model with the new one.
* correct the lost module.
* info update
* fixed issues in the service
* fixed a stupic typo
* runnable version for full demo
* Recover the state machine for demo.
* Added a simple loop to refresh the frame taken, should work fine.
* adding learn_face
* adding detect faces
* Add detect faces state
* editing detect phases
* Implement state for learning faces
* adding in extra states
* any changes
* adjustments when testing
* push lol
* finished merging?
* new state machine
* merging from rexy laptop
* fixed detect faces
* getting rid of lasr_speech
* swapping from base controllers to skills
* fixing state machine after merge with main
* making changes
* TEST: .idea diff
* feat: get name and drink skill & prior data yaml
* fix: missing keys for get name & drink constructor
* feat: stubs for get attribute state
* feat: wrap head manager and lowering base into GoToLocation.
* feat: state machine setup.
* test: Github tracking fixes
* feat: allow AskAndListen to take static text, a format string or reda from userdata.
* refactor: being picky about spaces.
* fix: import states in dunder init.
* feat: get name and drink, and stub for attributes.
* fix: typehint for userdata
* feat: setup host.
* fix: typehint.
* fix: refactor ParseNameAndDrink state to convert possible names and drinks to lowercase
* refactor: receptionist -> sm_merge (conflicts with python package...)
* refactor: remove redundant __init__.py
* fix: container consistency.
* fix: initialise data correctly.
* fix: lower.
* fix: uncomment.
* feat: take seating area pose and polygon as input to sm.
* feat: guide guest to seating area.
* feat: add introduce state
* feat: add introduction to state machine
* feat(WIP): find empty seat.
* fix: sneaky parenthesis stops boolifying.
* fix: correctly access guest attribute data
* fix: get introduce working.
* feat(WIP, still): find an empty seat.
* feat/refactor: SeatGuest is working and rename.
* feat: update init.py
* refactor: static motions.
* feat: seat guest in state machine.
* feat: send head back to default.
* refactor: center -> centre.
* feat: logic for second guest and pass guest id through constructors.
* fix: transition.
* refactor: drop unused method.
* Remove print statement and add TODO.
* fix: namespace.
* fix: transition and assume PlayMotion succeeded.
* test: yaml setup for testing.
* fix: transitions.
* refactor: remove redundant comment.
* fix: dumb intersection bug.
* feat: clip skill for VQA
* feat: attempt of clip for attributes
* fix: missing imports
* feat: tidied clip attributes
* refactor: cleanup.
* refactor: cleanup again.
* refactor: massive cleanup.
* refactor: remove hair colour images.
* refactor: cleanup cv2_img.
* refactor: move Vqa service message into lasr_vision_msgs.
* fix: import.
* cleanup: remove vision changes.
* refactor: cleanup clip vqa skill.
* refactor: cleanup describe people skill.
* refactor: cleanup vision msgs.
* refactor: remove shebang and unused import.
* refactor: remove unnecessary message.
* refactor: make QueryImage a ServiceState.
* refactor: make QueryImage a ServiceState.
* build: CMake and requirements.
* feat: fix the msgs for bodypix
* feat: test initial segment person
* feat: filter the poses liek the masks
* feat: filter the poses liek the masks
* chore: add pcl to img msg and keep the same timestampt
* chore: change the look to point to use pointstamp instead
* chore: add the 3d image skill
* feat: working look at person
* chore: cleanup for PR
* fix: the PR comments + add head manager disable/enable
* feat: wip
* feat: working wip
* feat: look at multiple people working
* chore: prep for PR
* chore: reformat agaaaain
---------
Co-authored-by: Jared Swift
Co-authored-by: Paul Makles
Co-authored-by: m-barker
Co-authored-by: Benteng Ma
Co-authored-by: MBT <59250708+MBTMBTMBT@users.noreply.github.com>
Co-authored-by: tiago
Co-authored-by: Zoe
Co-authored-by: Aaliyah Merchant
Co-authored-by: Haiwei Luo
---
.../helpers/cv2_pcl/src/cv2_pcl/__init__.py | 11 +
.../src/lasr_vision_bodypix/bodypix.py | 1 -
.../lasr_vision_msgs/msg/BodyPixPose.msg | 1 -
skills/src/lasr_skills/detect_faces.py | 19 +-
skills/src/lasr_skills/look_at_person.py | 282 ++++++++++++++++++
skills/src/lasr_skills/look_to_point.py | 7 +-
tasks/receptionist/launch/setup.launch | 1 -
7 files changed, 309 insertions(+), 13 deletions(-)
create mode 100755 skills/src/lasr_skills/look_at_person.py
diff --git a/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py b/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py
index 05f3ef901..dfc27415c 100644
--- a/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py
+++ b/common/helpers/cv2_pcl/src/cv2_pcl/__init__.py
@@ -2,12 +2,23 @@
from sensor_msgs.msg import PointCloud2
import ros_numpy as rnp
import cv2
+from cv2_img import cv2_img_to_msg
from typing import Tuple, Union
Mat = np.ndarray
+def pcl_to_img_msg(pcl: PointCloud2) -> Mat:
+ """
+ Convert a given PointCloud2 message to img_msg
+ """
+ # keep the same timestamp
+ cv2 = pcl_to_cv2(pcl)
+
+ return cv2_img_to_msg(cv2, pcl.header.stamp)
+
+
def pcl_to_cv2(
pcl: PointCloud2, height: Union[int, None] = None, width: Union[int, None] = None
) -> Mat:
diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
index dcd811b89..b6afa0f6a 100644
--- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
+++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py
@@ -114,7 +114,6 @@ def detect(
keypoints_msg = []
for i, keypoint in pose.keypoints.items():
-
if camel_to_snake(keypoint.part) in request.masks[0].parts:
keypoint_msg = BodyPixKeypoint()
keypoint_msg.xy = [int(keypoint.position.x), int(keypoint.position.y)]
diff --git a/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg b/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
index 1ba8b9f7d..0d416eeae 100644
--- a/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
+++ b/common/vision/lasr_vision_msgs/msg/BodyPixPose.msg
@@ -1,2 +1 @@
-# keypoints
BodyPixKeypoint[] keypoints
\ No newline at end of file
diff --git a/skills/src/lasr_skills/detect_faces.py b/skills/src/lasr_skills/detect_faces.py
index c0605b3f8..55af6204a 100644
--- a/skills/src/lasr_skills/detect_faces.py
+++ b/skills/src/lasr_skills/detect_faces.py
@@ -3,21 +3,30 @@
from lasr_vision_msgs.srv import DetectFaces as DetectFacesSrv
from sensor_msgs.msg import Image
+from cv2_pcl import pcl_to_img_msg
class DetectFaces(smach.State):
-
- def __init__(self, image_topic: str = "/xtion/rgb/image_raw"):
+ def __init__(
+ self,
+ image_topic: str = "/xtion/rgb/image_raw",
+ ):
smach.State.__init__(
- self, outcomes=["succeeded", "failed"], output_keys=["detections"]
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["pcl_msg"],
+ output_keys=["detections"],
)
+ self.img_msg = None
self._image_topic = image_topic
self._detect_faces = rospy.ServiceProxy("/detect_faces", DetectFacesSrv)
def execute(self, userdata):
- img_msg = rospy.wait_for_message(self._image_topic, Image)
+ self.img_msg = pcl_to_img_msg(userdata.pcl_msg)
+ if not self.img_msg:
+ self.img_msg = rospy.wait_for_message(self._image_topic, Image)
try:
- result = self._detect_faces(img_msg)
+ result = self._detect_faces(self.img_msg)
userdata.detections = result
return "succeeded"
except rospy.ServiceException as e:
diff --git a/skills/src/lasr_skills/look_at_person.py b/skills/src/lasr_skills/look_at_person.py
new file mode 100755
index 000000000..1fc477518
--- /dev/null
+++ b/skills/src/lasr_skills/look_at_person.py
@@ -0,0 +1,282 @@
+import smach_ros
+from geometry_msgs.msg import PointStamped
+import smach
+from vision import GetPointCloud
+from lasr_vision_msgs.srv import BodyPixDetection, BodyPixDetectionRequest
+from lasr_vision_msgs.msg import BodyPixMaskRequest
+from lasr_skills import LookToPoint, DetectFaces
+import cv2
+import rospy
+from sensor_msgs.msg import Image
+from cv2_img import cv2_img_to_msg, msg_to_cv2_img
+from markers import create_and_publish_marker
+from visualization_msgs.msg import Marker
+from cv2_pcl import pcl_to_img_msg
+import ros_numpy as rnp
+import rosservice
+from smach import CBState
+from std_msgs.msg import String
+import actionlib
+from control_msgs.msg import PointHeadAction, PointHeadGoal
+from geometry_msgs.msg import Point
+
+PUBLIC_CONTAINER = False
+
+try:
+ from pal_startup_msgs.srv import (
+ StartupStart,
+ StartupStop,
+ StartupStartRequest,
+ StartupStopRequest,
+ )
+except ModuleNotFoundError:
+ PUBLIC_CONTAINER = True
+
+
+class LookAtPerson(smach.StateMachine):
+ class CheckEyes(smach.State):
+ def __init__(self, debug=True):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed", "no_detection"],
+ input_keys=["bbox_eyes", "pcl_msg", "masks", "detections"],
+ output_keys=["pointstamped"],
+ )
+ self.DEBUG = debug
+ self.img_pub = rospy.Publisher("/debug/image", Image, queue_size=1)
+ self.marker_pub = rospy.Publisher("eyes", Marker, queue_size=1)
+ self.look_at_pub = actionlib.SimpleActionClient(
+ "/head_controller/point_head_action", PointHeadAction
+ )
+
+ def execute(self, userdata):
+ rospy.sleep(1)
+ if len(userdata.bbox_eyes) < 1 and len(userdata.detections.detections) > 0:
+ return "succeeded"
+ elif (
+ len(userdata.bbox_eyes) < 1 and len(userdata.detections.detections) < 1
+ ):
+ return "no_detection"
+
+ for det in userdata.bbox_eyes:
+ left_eye = det["left_eye"]
+ right_eye = det["right_eye"]
+ eye_point = (left_eye[0] + right_eye[0]) / 2, (
+ left_eye[1] + right_eye[1]
+ ) / 2
+
+ if self.DEBUG:
+ img = msg_to_cv2_img(pcl_to_img_msg(userdata.pcl_msg))
+ cv2.circle(img, (left_eye[0], left_eye[1]), 5, (0, 255, 0), -1)
+ cv2.circle(img, (right_eye[0], right_eye[1]), 5, (0, 255, 0), -1)
+ img_msg1 = cv2_img_to_msg(img)
+ self.img_pub.publish(img_msg1)
+
+ pcl_xyz = rnp.point_cloud2.pointcloud2_to_xyz_array(
+ userdata.pcl_msg, remove_nans=False
+ )
+ eye_point_pcl = pcl_xyz[int(eye_point[1]), int(eye_point[0])]
+ if any([True for i in eye_point_pcl if i != i]):
+ eye_point_pcl = pcl_xyz[int(right_eye[1]), int(right_eye[0])]
+
+ look_at = PointStamped()
+ look_at.header = userdata.pcl_msg.header
+ look_at.point.x = eye_point_pcl[0]
+ look_at.point.y = eye_point_pcl[1]
+ look_at.point.z = eye_point_pcl[2]
+
+ if self.DEBUG:
+ create_and_publish_marker(self.marker_pub, look_at, r=0, g=1, b=0)
+
+ userdata.bbox_eyes.remove(det)
+ userdata.pointstamped = look_at
+
+ self.look_at_pub.wait_for_server()
+ goal = PointHeadGoal()
+ goal.pointing_frame = "head_2_link"
+ goal.pointing_axis = Point(1.0, 0.0, 0.0)
+ goal.max_velocity = 1.0
+ goal.target = look_at
+ self.look_at_pub.send_goal(goal)
+
+ return "succeeded"
+
+ return "failed"
+
+ @smach.cb_interface(input_keys=["poses", "detections"], output_keys=["bbox_eyes"])
+ def match_poses_and_detections(ud):
+ bbox_eyes = []
+ for pose in ud.poses:
+ for detection in ud.detections.detections:
+ temp = {
+ "bbox": detection.xywh,
+ }
+ for keypoint in pose.keypoints:
+ if (
+ keypoint.part == "leftEye"
+ and detection.xywh[0]
+ < keypoint.xy[0]
+ < detection.xywh[0] + detection.xywh[2]
+ and detection.xywh[1]
+ < keypoint.xy[1]
+ < detection.xywh[1] + detection.xywh[3]
+ ):
+ temp["left_eye"] = keypoint.xy
+ if (
+ keypoint.part == "rightEye"
+ and detection.xywh[0]
+ < keypoint.xy[0]
+ < detection.xywh[0] + detection.xywh[2]
+ and detection.xywh[1]
+ < keypoint.xy[1]
+ < detection.xywh[1] + detection.xywh[3]
+ ):
+ temp["right_eye"] = keypoint.xy
+
+ if "left_eye" in temp and "right_eye" in temp:
+ bbox_eyes.append(temp)
+
+ ud.bbox_eyes = bbox_eyes
+
+ return "succeeded"
+
+ def __init__(self):
+ super(LookAtPerson, self).__init__(
+ outcomes=["succeeded", "failed"],
+ input_keys=[],
+ output_keys=["masks", "poses", "pointstamped"],
+ )
+ self.DEBUG = rospy.get_param("/debug", True)
+ IS_SIMULATION = (
+ "/pal_startup_control/start" not in rosservice.get_service_list()
+ )
+
+ with self:
+ if not IS_SIMULATION:
+ if PUBLIC_CONTAINER:
+ rospy.logwarn(
+ "You are using a public container. The head manager will not be stopped during navigation."
+ )
+ else:
+ smach.StateMachine.add(
+ "DISABLE_HEAD_MANAGER",
+ smach_ros.ServiceState(
+ "/pal_startup_control/stop",
+ StartupStop,
+ request=StartupStopRequest("head_manager"),
+ ),
+ transitions={
+ "succeeded": "GET_IMAGE",
+ "aborted": "failed",
+ "preempted": "failed",
+ },
+ )
+ smach.StateMachine.add(
+ "GET_IMAGE",
+ GetPointCloud("/xtion/depth_registered/points"),
+ transitions={
+ "succeeded": "SEGMENT_PERSON",
+ },
+ remapping={"pcl_msg": "pcl_msg"},
+ )
+
+ eyes = BodyPixMaskRequest()
+ eyes.parts = ["left_eye", "right_eye"]
+ masks = [eyes]
+
+ smach.StateMachine.add(
+ "SEGMENT_PERSON",
+ smach_ros.ServiceState(
+ "/bodypix/detect",
+ BodyPixDetection,
+ request_cb=lambda ud, _: BodyPixDetectionRequest(
+ pcl_to_img_msg(ud.pcl_msg), "resnet50", 0.7, masks
+ ),
+ response_slots=["masks", "poses"],
+ input_keys=["pcl_msg"],
+ output_keys=["masks", "poses"],
+ ),
+ transitions={
+ "succeeded": "DETECT_FACES",
+ "aborted": "failed",
+ "preempted": "failed",
+ },
+ )
+ smach.StateMachine.add(
+ "DETECT_FACES",
+ DetectFaces(),
+ transitions={
+ "succeeded": "MATCH_POSES_AND_DETECTIONS",
+ "failed": "failed",
+ },
+ remapping={"pcl_msg": "pcl_msg", "detections": "detections"},
+ )
+
+ smach.StateMachine.add(
+ "MATCH_POSES_AND_DETECTIONS",
+ CBState(
+ self.match_poses_and_detections,
+ input_keys=["poses", "detections"],
+ output_keys=["poses"],
+ outcomes=["succeeded", "failed"],
+ ),
+ transitions={"succeeded": "CHECK_EYES", "failed": "failed"},
+ remapping={"bbox_eyes": "bbox_eyes"},
+ )
+ smach.StateMachine.add(
+ "CHECK_EYES",
+ self.CheckEyes(self.DEBUG),
+ transitions={
+ "succeeded": "LOOK_TO_POINT",
+ "failed": "failed",
+ "no_detection": "succeeded",
+ },
+ remapping={
+ "pcl_msg": "pcl_msg",
+ "bbox_eyes": "bbox_eyes",
+ "pointstamped": "pointstamped",
+ },
+ )
+
+ smach.StateMachine.add(
+ "LOOK_TO_POINT",
+ LookToPoint(),
+ transitions={
+ "succeeded": "LOOP",
+ "aborted": "failed",
+ "preempted": "failed",
+ },
+ remapping={"pointstamped": "pointstamped"},
+ )
+ smach.StateMachine.add(
+ "LOOP",
+ smach.CBState(
+ lambda ud: "succeeded" if len(ud.bbox_eyes) > 0 else "finish",
+ input_keys=["bbox_eyes"],
+ output_keys=["bbox_eyes"],
+ outcomes=["succeeded", "finish"],
+ ),
+ transitions={
+ "succeeded": "CHECK_EYES",
+ "finish": "ENABLE_HEAD_MANAGER",
+ },
+ )
+ if not IS_SIMULATION:
+ if PUBLIC_CONTAINER:
+ rospy.logwarn(
+ "You are using a public container. The head manager will not be start following navigation."
+ )
+ else:
+ smach.StateMachine.add(
+ "ENABLE_HEAD_MANAGER",
+ smach_ros.ServiceState(
+ "/pal_startup_control/start",
+ StartupStart,
+ request=StartupStartRequest("head_manager", ""),
+ ),
+ transitions={
+ "succeeded": "succeeded",
+ "preempted": "failed",
+ "aborted": "failed",
+ },
+ )
diff --git a/skills/src/lasr_skills/look_to_point.py b/skills/src/lasr_skills/look_to_point.py
index 81c633ad7..92fe2e3e1 100755
--- a/skills/src/lasr_skills/look_to_point.py
+++ b/skills/src/lasr_skills/look_to_point.py
@@ -13,10 +13,7 @@ def __init__(self):
pointing_frame="head_2_link",
pointing_axis=Point(1.0, 0.0, 0.0),
max_velocity=1.0,
- target=PointStamped(
- header=Header(frame_id="map"),
- point=ud.point,
- ),
+ target=ud.pointstamped,
),
- input_keys=["point"],
+ input_keys=["pointstamped"],
)
diff --git a/tasks/receptionist/launch/setup.launch b/tasks/receptionist/launch/setup.launch
index a8fd79ede..894406256 100644
--- a/tasks/receptionist/launch/setup.launch
+++ b/tasks/receptionist/launch/setup.launch
@@ -19,5 +19,4 @@
-
From 09bae511042c4a97b5173feb60e9efa5138bd353 Mon Sep 17 00:00:00 2001
From: Jared Swift
Date: Tue, 28 May 2024 10:06:41 +0100
Subject: [PATCH 3/3] Receptionist fixes 25 04 (#207)
* fix: boolify facial hair attribute.
* fix: drop host attributes.
* feat: use DescribePeople skill.
* fix: update guest str generation.
* refactor: remove redundant message.
---
.../image_with_masks_and_attributes.py | 2 +-
tasks/receptionist/scripts/main.py | 6 -
.../src/receptionist/states/get_attributes.py | 137 ++++++------------
.../src/receptionist/states/introduce.py | 12 +-
4 files changed, 54 insertions(+), 103 deletions(-)
diff --git a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
index c5df408a4..d74fdb450 100644
--- a/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
+++ b/common/vision/lasr_vision_feature_extraction/src/lasr_vision_feature_extraction/image_with_masks_and_attributes.py
@@ -195,7 +195,7 @@ def describe(self) -> str:
"hair_colour": hair_colour_str,
"hair_shape": hair_shape_str,
"male": male[0],
- "facial_hair": facial_hair[0],
+ "facial_hair": facial_hair[0] != "No_Beard",
"hat": hat[0],
"glasses": glasses[0],
"earrings": earrings[0],
diff --git a/tasks/receptionist/scripts/main.py b/tasks/receptionist/scripts/main.py
index 808952737..5e9cd28e6 100755
--- a/tasks/receptionist/scripts/main.py
+++ b/tasks/receptionist/scripts/main.py
@@ -33,12 +33,6 @@
{
"name": "John",
"drink": "beer",
- "attributes": {
- "hair_colour": "strawberry blonde",
- "glasses": False,
- "hat": True,
- "height": "tall",
- },
},
)
outcome = receptionist.execute()
diff --git a/tasks/receptionist/src/receptionist/states/get_attributes.py b/tasks/receptionist/src/receptionist/states/get_attributes.py
index 22538a267..2f0dfd3bb 100644
--- a/tasks/receptionist/src/receptionist/states/get_attributes.py
+++ b/tasks/receptionist/src/receptionist/states/get_attributes.py
@@ -1,103 +1,56 @@
-"""
-State for calling the service to get a set of guest attributes.
-Currently incomplete.
-"""
-
-import rospy
import smach
from smach import UserData
-from typing import List, Any, Dict, Union
-from lasr_vision_clip.srv import VqaRequest, VqaResponse, Vqa
+from lasr_skills import DescribePeople
+import json
+
+
+class GetGuestAttributes(smach.StateMachine):
+
+ class HandleGuestAttributes(smach.State):
+ def __init__(self, guest_id: str):
+ smach.State.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["people", "guest_data"],
+ output_keys=["guest_data"],
+ )
+ self._guest_id: str = guest_id
+
+ def execute(self, userdata: UserData) -> str:
+ if len(userdata.people) == 0:
+ return "failed"
+ userdata.guest_data[self._guest_id]["attributes"] = json.loads(
+ userdata.people[0]["features"]
+ )["attributes"]
+ return "succeeded"
-class GetGuestAttributes(smach.State):
def __init__(
self,
guest_id: str,
- attribute_service: Union[str, None] = None,
- outcomes: List[str] = ["succeeded", "failed"],
- input_keys: List[str] = ["guest_id", "guest_data"],
- output_keys: List[str] = ["guest_data"],
):
- """Calls and parses the service that gets a set of guest attributes.
-
- Args:
- attribute_service (str): Name of the service to call that returns the guest's attributes.
- """
-
- super().__init__(
- outcomes=outcomes,
- input_keys=input_keys,
- output_keys=output_keys,
+ smach.StateMachine.__init__(
+ self,
+ outcomes=["succeeded", "failed"],
+ input_keys=["guest_id", "guest_data"],
+ output_keys=["guest_data"],
)
- self._service_proxy = rospy.ServiceProxy("/clip_vqa/query_service", Vqa)
self._guest_id: str = guest_id
- self._attribute_service: Union[str, None] = attribute_service
-
- def _call_attribute_service(self):
- # TODO
- pass
-
- def _send_vqa_request(self, possible_answers: List[str]) -> str:
- request = VqaRequest()
- request.possible_answers = possible_answers
- response = self._service_proxy(request)
- return response.answer
-
- def execute(self, userdata: UserData) -> str:
- if self._attribute_service:
- attributes = self._call_attribute_service()
- else:
- glasses_answers = [
- "a person wearing glasses",
- "a person not wearing glasses",
- ]
- hat_answers = ["a person wearing a hat", "a person not wearing a hat"]
- height_answers = ["a tall person", "a short person"]
- hair_colour_answers = [
- "a person with black hair",
- "a person with blonde hair",
- "a person with brown hair",
- "a person with red hair",
- "a person with grey hair",
- "a person with white hair",
- ]
-
- glasses_response = self._send_vqa_request(glasses_answers)
- hat_response = self._send_vqa_request(hat_answers)
- height_response = self._send_vqa_request(height_answers)
- hair_colour_response = self._send_vqa_request(hair_colour_answers)
-
- if glasses_response == "a person wearing glasses":
- glasses = True
- else:
- glasses = False
- if hat_response == "a person wearing a hat":
- hat = True
- else:
- hat = False
- if height_response == "a tall person":
- height = "tall"
- else:
- height = "short"
- if hair_colour_response == "a person with black hair":
- hair_colour = "black"
- elif hair_colour_response == "a person with blonde hair":
- hair_colour = "blonde"
- elif hair_colour_response == "a person with brown hair":
- hair_colour = "brown"
- elif hair_colour_response == "a person with red hair":
- hair_colour = "red"
- elif hair_colour_response == "a person with grey hair":
- hair_colour = "grey"
-
- attributes = {
- "hair_colour": hair_colour,
- "glasses": glasses,
- "hat": hat,
- "height": height,
- }
-
- userdata.guest_data[self._guest_id]["attributes"] = attributes
- return "succeeded"
+ with self:
+ smach.StateMachine.add(
+ "GET_GUEST_ATTRIBUTES",
+ DescribePeople(),
+ transitions={
+ "succeeded": "HANDLE_GUEST_ATTRIBUTES",
+ "failed": "failed",
+ },
+ )
+ smach.StateMachine.add(
+ "HANDLE_GUEST_ATTRIBUTES",
+ self.HandleGuestAttributes(self._guest_id),
+ transitions={
+ "succeeded": "succeeded",
+ "failed": "failed",
+ },
+ )
diff --git a/tasks/receptionist/src/receptionist/states/introduce.py b/tasks/receptionist/src/receptionist/states/introduce.py
index cedb54740..77dcc96d9 100644
--- a/tasks/receptionist/src/receptionist/states/introduce.py
+++ b/tasks/receptionist/src/receptionist/states/introduce.py
@@ -27,10 +27,14 @@ def stringify_guest_data(guest_data: Dict[str, Any], guest_id: str) -> str:
guest_str = ""
- guest_str += f"{relevant_guest_data['name']}, their favourite drink is {relevant_guest_data['drink']}. "
- guest_str += f"They have {relevant_guest_data['attributes']['hair_colour']} hair, their height is {relevant_guest_data['attributes']['height']}, "
- guest_str += f"they are {'wearing glasses' if relevant_guest_data['attributes']['glasses'] else 'not wearing glasses'}, and they are "
- guest_str += f"{'wearing a hat' if relevant_guest_data['attributes']['hat'] else 'not wearing a hat'}."
+ if "attributes" in relevant_guest_data.keys():
+ guest_str += f"They have {relevant_guest_data['attributes']['hair_shape']} {relevant_guest_data['attributes']['hair_colour']} hair, and they "
+ guest_str += f"{'have facial hair' if relevant_guest_data['attributes']['facial_hair'] else 'do not have facial hair'}. "
+ guest_str += f"They are {'wearing glasses' if relevant_guest_data['attributes']['glasses'] else 'not wearing glasses'}, "
+ guest_str += f"{'wearing earrings' if relevant_guest_data['attributes']['earrings'] else 'not wearing earrings'}, and "
+ guest_str += f"{'wearing a hat' if relevant_guest_data['attributes']['hat'] else 'not wearing a hat'}. "
+ guest_str += f"They are {'wearing a necklace' if relevant_guest_data['attributes']['necklace'] else 'not wearing a necklace'}, and "
+ guest_str += f"{'wearing a necktie' if relevant_guest_data['attributes']['necktie'] else 'not wearing a necktie'}. "
return guest_str