add correct tilt option

ototadana · Jul 8, 2023 · 5d315e6 · 5d315e6
1 parent 5246499
commit 5d315e6
Show file tree

Hide file tree

Showing 9 changed files with 175 additions and 20 deletions.
diff --git a/scripts/entities/face.py b/scripts/entities/face.py
@@ -1,10 +1,12 @@
+import traceback
+
 import cv2
 import numpy as np
 from modules import images
 from PIL import Image
 
 from scripts.entities.option import Option
-from scripts.entities.rect import Rect
+from scripts.entities.rect import Point, Rect
 
 
 class Face:
@@ -21,16 +23,29 @@ def __init__(self, entire_image: np.ndarray, face_area: Rect, face_margin: float
         self.height = self.bottom - self.top
 
         self.image = self.__crop_face_image(entire_image, face_size, upscaler)
-        self.face_area_on_image = self.__get_face_area_on_image(face_size)
-
-    def __get_face_area_on_image(self, face_size: int):
-        scaleFactor = face_size / self.width
-        return (
-            int((self.face_area.left - self.left) * scaleFactor),
-            int((self.face_area.top - self.top) * scaleFactor),
-            int((self.face_area.right - self.left) * scaleFactor),
-            int((self.face_area.bottom - self.top) * scaleFactor),
-        )
+        self.face_size = face_size
+        self.scale_factor = face_size / self.width
+        self.face_area_on_image = self.__get_face_area_on_image()
+        self.landmarks_on_image = self.__get_landmarks_on_image()
+
+    def __get_face_area_on_image(self):
+        left = int((self.face_area.left - self.left) * self.scale_factor)
+        top = int((self.face_area.top - self.top) * self.scale_factor)
+        right = int((self.face_area.right - self.left) * self.scale_factor)
+        bottom = int((self.face_area.bottom - self.top) * self.scale_factor)
+        return self.__clip_values(left, top, right, bottom)
+
+    def __get_landmarks_on_image(self):
+        landmarks = []
+        if self.face_area.landmarks is not None:
+            for landmark in self.face_area.landmarks:
+                landmarks.append(
+                    Point(
+                        int((landmark.x - self.left) * self.scale_factor),
+                        int((landmark.y - self.top) * self.scale_factor),
+                    )
+                )
+        return landmarks
 
     def __crop_face_image(self, entire_image: np.ndarray, face_size: int, upscaler: str):
         cropped = entire_image[self.top : self.bottom, self.left : self.right, :]
@@ -66,3 +81,65 @@ def __ensure_margin(self, left: int, top: int, right: int, bottom: int, entire_i
             right = entire_width
 
         return left, top, right, bottom
+
+    def get_angle(self) -> float:
+        landmarks = getattr(self.face_area, "landmarks", None)
+        if landmarks is None:
+            return 0
+
+        eye1 = getattr(landmarks, "eye1", None)
+        eye2 = getattr(landmarks, "eye2", None)
+        if eye2 is None or eye1 is None:
+            return 0
+
+        try:
+            dx = eye2.x - eye1.x
+            dy = eye2.y - eye1.y
+            angle = np.arctan(dy / dx) * 180 / np.pi
+
+            if dx < 0:
+                angle = angle = (angle + 180) % 360
+            return angle
+        except Exception:
+            print(traceback.format_exc())
+            return 0
+
+    def rotate_face_area_on_image(self, angle: float):
+        center = [
+            (self.face_area_on_image[0] + self.face_area_on_image[2]) / 2,
+            (self.face_area_on_image[1] + self.face_area_on_image[3]) / 2,
+        ]
+
+        points = [
+            [self.face_area_on_image[0], self.face_area_on_image[1]],
+            [self.face_area_on_image[2], self.face_area_on_image[3]],
+        ]
+
+        angle = np.radians(angle)
+        rot_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
+
+        points = np.array(points) - center
+        points = np.dot(points, rot_matrix.T)
+        points += center
+        left, top, right, bottom = (int(points[0][0]), int(points[0][1]), int(points[1][0]), int(points[1][1]))
+
+        left, right = (right, left) if left > right else (left, right)
+        top, bottom = (bottom, top) if top > bottom else (top, bottom)
+
+        width, height = right - left, bottom - top
+        if width < height:
+            left, right = left - (height - width) // 2, right + (height - width) // 2
+        elif height < width:
+            top, bottom = top - (width - height) // 2, bottom + (width - height) // 2
+        return self.__clip_values(left, top, right, bottom)
+
+    def __clip_values(self, *args):
+        result = []
+        for val in args:
+            if val < 0:
+                result.append(0)
+            elif val > self.face_size:
+                result.append(self.face_size)
+            else:
+                result.append(val)
+        return tuple(result)
diff --git a/scripts/inferencers/debug_processor.py b/scripts/inferencers/debug_processor.py
@@ -46,6 +46,9 @@ def process(
         cv2.rectangle(overlay, (0, 0), (image.shape[1], image.shape[0]), next(color_iter), -1)
         l, t, r, b = face.face_area_on_image
         cv2.rectangle(overlay, (l, t), (r, b), (0, 0, 0), 10)
+        if face.landmarks_on_image is not None:
+            for landmark in face.landmarks_on_image:
+                cv2.circle(overlay, (int(landmark.x), int(landmark.y)), 6, (0, 0, 0), 10)
         alpha = 0.3
         output = cv2.addWeighted(image, 1 - alpha, overlay, alpha, 0)
         return Image.fromarray(output)
diff --git a/scripts/inferencers/mediapipe/face_detector.py b/scripts/inferencers/mediapipe/face_detector.py
@@ -4,7 +4,7 @@
 import numpy as np
 from PIL import Image
 
-from scripts.entities.rect import Rect
+from scripts.entities.rect import Landmarks, Point, Rect
 from scripts.use_cases.face_detector import FaceDetector
 
 
@@ -29,5 +29,14 @@ def detect_faces(self, image: Image, conf: float = 0.01, **kwargs) -> List[Rect]
             top = int(relative_box.ymin * height)
             right = int(left + (relative_box.width * width))
             bottom = int(top + (relative_box.height * height))
-            rects.append(Rect(left, top, right, bottom))
+
+            keypoints = d.location_data.relative_keypoints
+
+            eye1 = Point(int(keypoints[0].x * width), int(keypoints[0].y * height))
+            eye2 = Point(int(keypoints[1].x * width), int(keypoints[1].y * height))
+            nose = Point(int(keypoints[2].x * width), int(keypoints[2].y * height))
+            mouth = Point(int(keypoints[3].x * width), int(keypoints[3].y * height))
+
+            rects.append(Rect(left, top, right, bottom, landmarks=Landmarks(eye1, eye2, nose, mouth, mouth)))
+
         return rects
diff --git a/scripts/inferencers/retinaface_detector.py b/scripts/inferencers/retinaface_detector.py
@@ -5,7 +5,7 @@
 from facexlib.detection import init_detection_model, retinaface
 from PIL import Image
 
-from scripts.entities.rect import Rect
+from scripts.entities.rect import Landmarks, Point, Rect
 from scripts.use_cases.face_detector import FaceDetector
 
 
@@ -20,9 +20,21 @@ def name(self):
 
     def detect_faces(self, image: Image, confidence: float, **kwargs) -> List[Rect]:
         with torch.no_grad():
-            face_boxes, _ = self.detection_model.align_multi(image, confidence)
+            boxes_landmarks = self.detection_model.detect_faces(image, confidence)
 
         faces = []
-        for face_box in face_boxes:
-            faces.append(Rect.from_ndarray(face_box))
+        for box_landmark in boxes_landmarks:
+            face_box = box_landmark[:5]
+            landmark = box_landmark[5:]
+            face = Rect.from_ndarray(face_box)
+
+            eye1 = Point(int(landmark[0]), int(landmark[1]))
+            eye2 = Point(int(landmark[2]), int(landmark[3]))
+            nose = Point(int(landmark[4]), int(landmark[5]))
+            mouth2 = Point(int(landmark[6]), int(landmark[7]))
+            mouth1 = Point(int(landmark[8]), int(landmark[9]))
+
+            face.landmarks = Landmarks(eye1, eye2, nose, mouth1, mouth2)
+            faces.append(face)
+
         return faces
diff --git a/scripts/inferencers/rotate_face_processor.py b/scripts/inferencers/rotate_face_processor.py
@@ -0,0 +1,14 @@
+from modules.processing import StableDiffusionProcessingImg2Img
+from PIL import Image
+
+from scripts.entities.face import Face
+from scripts.use_cases.face_processor import FaceProcessor
+from scripts.use_cases.image_processing_util import rotate_image
+
+
+class RotateFaceProcessor(FaceProcessor):
+    def name(self) -> str:
+        return "Rotate"
+
+    def process(self, face: Face, p: StableDiffusionProcessingImg2Img, angle: float = 0, **kwargs) -> Image:
+        return rotate_image(face.image, angle)
diff --git a/scripts/ui/ui_builder.py b/scripts/ui/ui_builder.py
@@ -202,6 +202,11 @@ def on_ui_settings():
         shared.OptionInfo(True, "Save original image if face detection fails", gr.Checkbox, section=section),
     )
 
+    shared.opts.add_option(
+        "face_editor_correct_tilt",
+        shared.OptionInfo(False, "Adjust tilt for detected faces", gr.Checkbox, section=section),
+    )
+
     shared.opts.add_option(
         "face_editor_script_index",
         shared.OptionInfo(

diff --git a/scripts/use_cases/image_processing_util.py b/scripts/use_cases/image_processing_util.py
@@ -0,0 +1,20 @@
+import cv2
+import numpy as np
+from PIL import Image
+
+
+def rotate_image(image: Image, angle: float) -> Image:
+    if angle == 0:
+        return image
+    return Image.fromarray(rotate_array(np.array(image), angle))
+
+
+def rotate_array(image: np.ndarray, angle: float) -> np.ndarray:
+    if angle == 0:
+        return image
+
+    h, w = image.shape[:2]
+    center = (w // 2, h // 2)
+
+    M = cv2.getRotationMatrix2D(center, angle, 1.0)
+    return cv2.warpAffine(image, M, (w, h))
diff --git a/scripts/use_cases/image_processor.py b/scripts/use_cases/image_processor.py
@@ -113,7 +113,7 @@ def proc_image(
         if option.show_intermediate_steps:
             output_images.append(self.__show_detected_faces(np.copy(entire_image), faces, p))
 
-        print(f"number of faces: {len(faces)}")
+        print(f"number of faces: {len(faces)}.  ")
         if (
             len(faces) == 0
             and pre_proc_image is not None

diff --git a/scripts/use_cases/workflow_manager.py b/scripts/use_cases/workflow_manager.py
@@ -2,6 +2,7 @@
 
 import cv2
 import numpy as np
+from modules import shared
 from modules.processing import StableDiffusionProcessingImg2Img
 from PIL import Image
 
@@ -10,6 +11,7 @@
 from scripts.entities.option import Option
 from scripts.entities.rect import Rect
 from scripts.use_cases import registry
+from scripts.use_cases.image_processing_util import rotate_array, rotate_image
 
 
 class WorkflowManager:
@@ -32,6 +34,7 @@ def get(cls, workflow: str) -> "WorkflowManager":
 
     def __init__(self, workflow: Workflow) -> None:
         self.workflow = workflow
+        self.correct_tilt = shared.opts.data.get("face_editor_correct_tilt", False)
 
     def detect_faces(self, image: Image, option: Option) -> List[Rect]:
         results = []
@@ -136,7 +139,13 @@ def process(self, jobs: List[Job], face: Face, p: StableDiffusionProcessingImg2I
             face_processor = registry.face_processors[fp.name]
             params = fp.params.copy()
             params["strength1"] = option.strength1
-            face.image = face_processor.process(face, p, **params)
+
+            angle = face.get_angle()
+            face.image = rotate_image(face.image, angle) if self.correct_tilt else face.image
+
+            image = face_processor.process(face, p, **params)
+
+            face.image = rotate_image(image, -angle) if self.correct_tilt else image
         return face.image
 
     def generate_mask(self, jobs: List[Job], face_image: np.ndarray, face: Face, option: Option) -> np.ndarray:
@@ -149,7 +158,13 @@ def generate_mask(self, jobs: List[Job], face_image: np.ndarray, face: Face, opt
             params["use_minimal_area"] = option.use_minimal_area
             params["affected_areas"] = option.affected_areas
             params["tag"] = face.face_area.tag
-            m = mask_generator.generate_mask(face_image, face.face_area_on_image, **params)
+
+            angle = face.get_angle()
+            image = rotate_array(face_image, angle) if self.correct_tilt else face_image
+            face_area_on_image = face.rotate_face_area_on_image(angle) if self.correct_tilt else face.face_area_on_image
+            m = mask_generator.generate_mask(image, face_area_on_image, **params)
+            m = rotate_array(m, -angle) if self.correct_tilt else m
+
             if mask is None:
                 mask = m
             else: