From 5d315e6bd03817d3dda049f53e493fdd08cd7f13 Mon Sep 17 00:00:00 2001 From: ototadana Date: Sun, 9 Jul 2023 00:10:06 +0900 Subject: [PATCH] add correct tilt option --- scripts/entities/face.py | 99 ++++++++++++++++--- scripts/inferencers/debug_processor.py | 3 + .../inferencers/mediapipe/face_detector.py | 13 ++- scripts/inferencers/retinaface_detector.py | 20 +++- scripts/inferencers/rotate_face_processor.py | 14 +++ scripts/ui/ui_builder.py | 5 + scripts/use_cases/image_processing_util.py | 20 ++++ scripts/use_cases/image_processor.py | 2 +- scripts/use_cases/workflow_manager.py | 19 +++- 9 files changed, 175 insertions(+), 20 deletions(-) create mode 100644 scripts/inferencers/rotate_face_processor.py create mode 100644 scripts/use_cases/image_processing_util.py diff --git a/scripts/entities/face.py b/scripts/entities/face.py index a3f162b..5497b24 100644 --- a/scripts/entities/face.py +++ b/scripts/entities/face.py @@ -1,10 +1,12 @@ +import traceback + import cv2 import numpy as np from modules import images from PIL import Image from scripts.entities.option import Option -from scripts.entities.rect import Rect +from scripts.entities.rect import Point, Rect class Face: @@ -21,16 +23,29 @@ def __init__(self, entire_image: np.ndarray, face_area: Rect, face_margin: float self.height = self.bottom - self.top self.image = self.__crop_face_image(entire_image, face_size, upscaler) - self.face_area_on_image = self.__get_face_area_on_image(face_size) - - def __get_face_area_on_image(self, face_size: int): - scaleFactor = face_size / self.width - return ( - int((self.face_area.left - self.left) * scaleFactor), - int((self.face_area.top - self.top) * scaleFactor), - int((self.face_area.right - self.left) * scaleFactor), - int((self.face_area.bottom - self.top) * scaleFactor), - ) + self.face_size = face_size + self.scale_factor = face_size / self.width + self.face_area_on_image = self.__get_face_area_on_image() + self.landmarks_on_image = self.__get_landmarks_on_image() + + def __get_face_area_on_image(self): + left = int((self.face_area.left - self.left) * self.scale_factor) + top = int((self.face_area.top - self.top) * self.scale_factor) + right = int((self.face_area.right - self.left) * self.scale_factor) + bottom = int((self.face_area.bottom - self.top) * self.scale_factor) + return self.__clip_values(left, top, right, bottom) + + def __get_landmarks_on_image(self): + landmarks = [] + if self.face_area.landmarks is not None: + for landmark in self.face_area.landmarks: + landmarks.append( + Point( + int((landmark.x - self.left) * self.scale_factor), + int((landmark.y - self.top) * self.scale_factor), + ) + ) + return landmarks def __crop_face_image(self, entire_image: np.ndarray, face_size: int, upscaler: str): cropped = entire_image[self.top : self.bottom, self.left : self.right, :] @@ -66,3 +81,65 @@ def __ensure_margin(self, left: int, top: int, right: int, bottom: int, entire_i right = entire_width return left, top, right, bottom + + def get_angle(self) -> float: + landmarks = getattr(self.face_area, "landmarks", None) + if landmarks is None: + return 0 + + eye1 = getattr(landmarks, "eye1", None) + eye2 = getattr(landmarks, "eye2", None) + if eye2 is None or eye1 is None: + return 0 + + try: + dx = eye2.x - eye1.x + dy = eye2.y - eye1.y + angle = np.arctan(dy / dx) * 180 / np.pi + + if dx < 0: + angle = angle = (angle + 180) % 360 + return angle + except Exception: + print(traceback.format_exc()) + return 0 + + def rotate_face_area_on_image(self, angle: float): + center = [ + (self.face_area_on_image[0] + self.face_area_on_image[2]) / 2, + (self.face_area_on_image[1] + self.face_area_on_image[3]) / 2, + ] + + points = [ + [self.face_area_on_image[0], self.face_area_on_image[1]], + [self.face_area_on_image[2], self.face_area_on_image[3]], + ] + + angle = np.radians(angle) + rot_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]]) + + points = np.array(points) - center + points = np.dot(points, rot_matrix.T) + points += center + left, top, right, bottom = (int(points[0][0]), int(points[0][1]), int(points[1][0]), int(points[1][1])) + + left, right = (right, left) if left > right else (left, right) + top, bottom = (bottom, top) if top > bottom else (top, bottom) + + width, height = right - left, bottom - top + if width < height: + left, right = left - (height - width) // 2, right + (height - width) // 2 + elif height < width: + top, bottom = top - (width - height) // 2, bottom + (width - height) // 2 + return self.__clip_values(left, top, right, bottom) + + def __clip_values(self, *args): + result = [] + for val in args: + if val < 0: + result.append(0) + elif val > self.face_size: + result.append(self.face_size) + else: + result.append(val) + return tuple(result) diff --git a/scripts/inferencers/debug_processor.py b/scripts/inferencers/debug_processor.py index 675d2c3..70e9977 100644 --- a/scripts/inferencers/debug_processor.py +++ b/scripts/inferencers/debug_processor.py @@ -46,6 +46,9 @@ def process( cv2.rectangle(overlay, (0, 0), (image.shape[1], image.shape[0]), next(color_iter), -1) l, t, r, b = face.face_area_on_image cv2.rectangle(overlay, (l, t), (r, b), (0, 0, 0), 10) + if face.landmarks_on_image is not None: + for landmark in face.landmarks_on_image: + cv2.circle(overlay, (int(landmark.x), int(landmark.y)), 6, (0, 0, 0), 10) alpha = 0.3 output = cv2.addWeighted(image, 1 - alpha, overlay, alpha, 0) return Image.fromarray(output) diff --git a/scripts/inferencers/mediapipe/face_detector.py b/scripts/inferencers/mediapipe/face_detector.py index 14ab169..0c9190f 100644 --- a/scripts/inferencers/mediapipe/face_detector.py +++ b/scripts/inferencers/mediapipe/face_detector.py @@ -4,7 +4,7 @@ import numpy as np from PIL import Image -from scripts.entities.rect import Rect +from scripts.entities.rect import Landmarks, Point, Rect from scripts.use_cases.face_detector import FaceDetector @@ -29,5 +29,14 @@ def detect_faces(self, image: Image, conf: float = 0.01, **kwargs) -> List[Rect] top = int(relative_box.ymin * height) right = int(left + (relative_box.width * width)) bottom = int(top + (relative_box.height * height)) - rects.append(Rect(left, top, right, bottom)) + + keypoints = d.location_data.relative_keypoints + + eye1 = Point(int(keypoints[0].x * width), int(keypoints[0].y * height)) + eye2 = Point(int(keypoints[1].x * width), int(keypoints[1].y * height)) + nose = Point(int(keypoints[2].x * width), int(keypoints[2].y * height)) + mouth = Point(int(keypoints[3].x * width), int(keypoints[3].y * height)) + + rects.append(Rect(left, top, right, bottom, landmarks=Landmarks(eye1, eye2, nose, mouth, mouth))) + return rects diff --git a/scripts/inferencers/retinaface_detector.py b/scripts/inferencers/retinaface_detector.py index dc6b6e5..b6b159b 100644 --- a/scripts/inferencers/retinaface_detector.py +++ b/scripts/inferencers/retinaface_detector.py @@ -5,7 +5,7 @@ from facexlib.detection import init_detection_model, retinaface from PIL import Image -from scripts.entities.rect import Rect +from scripts.entities.rect import Landmarks, Point, Rect from scripts.use_cases.face_detector import FaceDetector @@ -20,9 +20,21 @@ def name(self): def detect_faces(self, image: Image, confidence: float, **kwargs) -> List[Rect]: with torch.no_grad(): - face_boxes, _ = self.detection_model.align_multi(image, confidence) + boxes_landmarks = self.detection_model.detect_faces(image, confidence) faces = [] - for face_box in face_boxes: - faces.append(Rect.from_ndarray(face_box)) + for box_landmark in boxes_landmarks: + face_box = box_landmark[:5] + landmark = box_landmark[5:] + face = Rect.from_ndarray(face_box) + + eye1 = Point(int(landmark[0]), int(landmark[1])) + eye2 = Point(int(landmark[2]), int(landmark[3])) + nose = Point(int(landmark[4]), int(landmark[5])) + mouth2 = Point(int(landmark[6]), int(landmark[7])) + mouth1 = Point(int(landmark[8]), int(landmark[9])) + + face.landmarks = Landmarks(eye1, eye2, nose, mouth1, mouth2) + faces.append(face) + return faces diff --git a/scripts/inferencers/rotate_face_processor.py b/scripts/inferencers/rotate_face_processor.py new file mode 100644 index 0000000..4271263 --- /dev/null +++ b/scripts/inferencers/rotate_face_processor.py @@ -0,0 +1,14 @@ +from modules.processing import StableDiffusionProcessingImg2Img +from PIL import Image + +from scripts.entities.face import Face +from scripts.use_cases.face_processor import FaceProcessor +from scripts.use_cases.image_processing_util import rotate_image + + +class RotateFaceProcessor(FaceProcessor): + def name(self) -> str: + return "Rotate" + + def process(self, face: Face, p: StableDiffusionProcessingImg2Img, angle: float = 0, **kwargs) -> Image: + return rotate_image(face.image, angle) diff --git a/scripts/ui/ui_builder.py b/scripts/ui/ui_builder.py index e682e8e..b2a33f9 100644 --- a/scripts/ui/ui_builder.py +++ b/scripts/ui/ui_builder.py @@ -202,6 +202,11 @@ def on_ui_settings(): shared.OptionInfo(True, "Save original image if face detection fails", gr.Checkbox, section=section), ) + shared.opts.add_option( + "face_editor_correct_tilt", + shared.OptionInfo(False, "Adjust tilt for detected faces", gr.Checkbox, section=section), + ) + shared.opts.add_option( "face_editor_script_index", shared.OptionInfo( diff --git a/scripts/use_cases/image_processing_util.py b/scripts/use_cases/image_processing_util.py new file mode 100644 index 0000000..01832fb --- /dev/null +++ b/scripts/use_cases/image_processing_util.py @@ -0,0 +1,20 @@ +import cv2 +import numpy as np +from PIL import Image + + +def rotate_image(image: Image, angle: float) -> Image: + if angle == 0: + return image + return Image.fromarray(rotate_array(np.array(image), angle)) + + +def rotate_array(image: np.ndarray, angle: float) -> np.ndarray: + if angle == 0: + return image + + h, w = image.shape[:2] + center = (w // 2, h // 2) + + M = cv2.getRotationMatrix2D(center, angle, 1.0) + return cv2.warpAffine(image, M, (w, h)) diff --git a/scripts/use_cases/image_processor.py b/scripts/use_cases/image_processor.py index 6abe415..9f39920 100644 --- a/scripts/use_cases/image_processor.py +++ b/scripts/use_cases/image_processor.py @@ -113,7 +113,7 @@ def proc_image( if option.show_intermediate_steps: output_images.append(self.__show_detected_faces(np.copy(entire_image), faces, p)) - print(f"number of faces: {len(faces)}") + print(f"number of faces: {len(faces)}. ") if ( len(faces) == 0 and pre_proc_image is not None diff --git a/scripts/use_cases/workflow_manager.py b/scripts/use_cases/workflow_manager.py index da7626d..f04eb0b 100644 --- a/scripts/use_cases/workflow_manager.py +++ b/scripts/use_cases/workflow_manager.py @@ -2,6 +2,7 @@ import cv2 import numpy as np +from modules import shared from modules.processing import StableDiffusionProcessingImg2Img from PIL import Image @@ -10,6 +11,7 @@ from scripts.entities.option import Option from scripts.entities.rect import Rect from scripts.use_cases import registry +from scripts.use_cases.image_processing_util import rotate_array, rotate_image class WorkflowManager: @@ -32,6 +34,7 @@ def get(cls, workflow: str) -> "WorkflowManager": def __init__(self, workflow: Workflow) -> None: self.workflow = workflow + self.correct_tilt = shared.opts.data.get("face_editor_correct_tilt", False) def detect_faces(self, image: Image, option: Option) -> List[Rect]: results = [] @@ -136,7 +139,13 @@ def process(self, jobs: List[Job], face: Face, p: StableDiffusionProcessingImg2I face_processor = registry.face_processors[fp.name] params = fp.params.copy() params["strength1"] = option.strength1 - face.image = face_processor.process(face, p, **params) + + angle = face.get_angle() + face.image = rotate_image(face.image, angle) if self.correct_tilt else face.image + + image = face_processor.process(face, p, **params) + + face.image = rotate_image(image, -angle) if self.correct_tilt else image return face.image def generate_mask(self, jobs: List[Job], face_image: np.ndarray, face: Face, option: Option) -> np.ndarray: @@ -149,7 +158,13 @@ def generate_mask(self, jobs: List[Job], face_image: np.ndarray, face: Face, opt params["use_minimal_area"] = option.use_minimal_area params["affected_areas"] = option.affected_areas params["tag"] = face.face_area.tag - m = mask_generator.generate_mask(face_image, face.face_area_on_image, **params) + + angle = face.get_angle() + image = rotate_array(face_image, angle) if self.correct_tilt else face_image + face_area_on_image = face.rotate_face_area_on_image(angle) if self.correct_tilt else face.face_area_on_image + m = mask_generator.generate_mask(image, face_area_on_image, **params) + m = rotate_array(m, -angle) if self.correct_tilt else m + if mask is None: mask = m else: