Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Op/video face blur mapper #253

Merged
merged 7 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion configs/config_all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ process:
keep_original_sample: true # whether to keep the original sample. If it's set to False, there will be only generated images in the final datasets and the original images will be removed. It's True in default.
caption_key: null # the key name of fields in samples to store captions for each images, the caption guide the diffusion model to produce what the image is
hf_img2seq: 'Salesforce/blip2-opt-2.7b' # model name on huggingface to generate caption if caption_key is null
- image_face_blur_mapper: # mapper to blur faces detected in images.
- image_face_blur_mapper: # blur faces detected in images
blur_type: 'gaussian' # type of blur kernel, including ['mean', 'box', 'gaussian']
radius: 2 # radius of blur kernel
- nlpaug_en_mapper: # simply augment texts in English based on the nlpaug library
Expand Down Expand Up @@ -151,6 +151,9 @@ process:
frame_num: 3 # the number of frames to be extracted uniformly from the video. Only works when frame_sampling_method is "uniform". If it's 1, only the middle frame will be extracted. If it's 2, only the first and the last frames will be extracted. If it's larger than 2, in addition to the first and the last frames, other frames will be extracted uniformly within the video duration.
horizontal_flip: false # flip frame image horizontally (left to right).
vertical_flip: false # flip frame image vertically (top to bottom).
- video_face_blur_mapper: # blur faces detected in videos
blur_type: 'gaussian' # type of blur kernel, including ['mean', 'box', 'gaussian']
radius: 2 # radius of blur kernel
- video_ffmpeg_wrapped_mapper: # simple wrapper for FFmpeg video filters
- video_remove_watermark_mapper: # Remove the watermarks in videos given regions
roi_strings: ['0,0,0.1,0.1'] # a given list of regions the watermarks locate. The format of each can be "x1, y1, x2, y2", "(x1, y1, x2, y2)", or "[x1, y1, x2, y2]".
Expand Down
5 changes: 3 additions & 2 deletions data_juicer/ops/mapper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
remove_words_with_incorrect_substrings_mapper,
replace_content_mapper, sentence_split_mapper,
video_captioning_from_audio_mapper,
video_captioning_from_video_mapper, video_ffmpeg_wrapped_mapper,
video_remove_watermark_mapper, video_resize_aspect_ratio_mapper,
video_captioning_from_video_mapper, video_face_blur_mapper,
video_ffmpeg_wrapped_mapper, video_remove_watermark_mapper,
video_resize_aspect_ratio_mapper,
video_resize_resolution_mapper, video_split_by_duration_mapper,
video_split_by_key_frame_mapper, video_split_by_scene_mapper,
video_tagging_from_audio_mapper,
Expand Down
111 changes: 111 additions & 0 deletions data_juicer/ops/mapper/video_face_blur_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import av

from data_juicer.utils.availability_utils import AvailabilityChecking
from data_juicer.utils.file_utils import transfer_filename
from data_juicer.utils.mm_utils import (load_data_with_context, load_video,
pil_to_opencv, process_each_frame)

from ..base_op import OPERATORS, Mapper
from ..op_fusion import LOADED_VIDEOS

OP_NAME = 'video_face_blur_mapper'

with AvailabilityChecking(['dlib', 'Pillow'], OP_NAME):
import dlib
from PIL import ImageFilter


@OPERATORS.register_module(OP_NAME)
@LOADED_VIDEOS.register_module(OP_NAME)
class VideoFaceBlurMapper(Mapper):
"""Mapper to blur faces detected in videos.
"""

_default_kwargs = {'upsample_num_times': 0}

def __init__(self,
blur_type: str = 'gaussian',
radius: float = 2,
*args,
**kwargs):
"""
Initialization method.

:param blur_type: Type of blur kernel, including
['mean', 'box', 'gaussian'].
:param radius: Radius of blur kernel.
:param args: extra args
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if blur_type not in ['mean', 'box', 'gaussian']:
raise ValueError(
f'Blur_type [{blur_type}] is not supported. '
f'Can only be one of ["mean", "box", "gaussian"]. ')
if radius < 0:
raise ValueError('Radius must be >= 0. ')

if blur_type == 'mean':
self.blur = ImageFilter.BLUR
elif blur_type == 'box':
self.blur = ImageFilter.BoxBlur(radius)
else:
self.blur = ImageFilter.GaussianBlur(radius)

self.blur_type = blur_type
self.radius = radius

self.extra_kwargs = {
k: kwargs.get(k, v)
for k, v in self._default_kwargs.items()
}

# Initialize face detector
self.detector = dlib.get_frontal_face_detector()

def process(self, sample, context=False):
# there is no video in this sample
if self.video_key not in sample or not sample[self.video_key]:
return sample

loaded_video_keys = sample[self.video_key]
sample, videos = load_data_with_context(sample, context,
loaded_video_keys, load_video)

processed_video_keys = {}
for video_key in loaded_video_keys:
# skip duplicate
if video_key in processed_video_keys:
continue

video = videos[video_key]
blured_video_key = transfer_filename(video_key, OP_NAME,
**self._init_parameters)
output_video_key = process_each_frame(video, blured_video_key,
self._blur_face)
processed_video_keys[video_key] = output_video_key

if not context:
video.close()

sample[self.video_key] = [
processed_video_keys[key] for key in loaded_video_keys
]
return sample

def _blur_face(self, frame):
image = frame.to_image()
img = pil_to_opencv(image)
dets = self.detector(img, **self.extra_kwargs)
if len(dets) > 0:
for det in dets:
x1 = max(det.left(), 0)
y1 = max(det.top(), 0)
x2 = min(det.right(), image.width)
y2 = min(det.bottom(), image.height)
blured_roi = image.crop((x1, y1, x2, y2)).filter(self.blur)
image.paste(blured_roi, (x1, y1, x2, y2))
frame = av.VideoFrame.from_image(image)
return frame
5 changes: 3 additions & 2 deletions data_juicer/ops/mapper/video_remove_watermark_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,9 @@ def process(self, sample, context=False):
def process_frame_func(frame):
return self._clean_watermark(frame, watermark_mask)

process_each_frame(video, cleaned_video_key,
process_frame_func)
cleaned_video_key = process_each_frame(video,
cleaned_video_key,
process_frame_func)

loaded_video_keys[index] = cleaned_video_key

Expand Down
12 changes: 12 additions & 0 deletions data_juicer/utils/mm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import datetime
import os
import re
import shutil
from typing import List, Union

import av
Expand Down Expand Up @@ -335,6 +336,8 @@ def process_each_frame(input_video: Union[str, av.container.InputContainer],
:param frame_func: a function which inputs a frame and outputs another
frame.
"""
frame_modified = False

# open the original video
if isinstance(input_video, str):
container = av.open(input_video)
Expand Down Expand Up @@ -364,6 +367,8 @@ def process_each_frame(input_video: Union[str, av.container.InputContainer],
for packet in container.demux(input_video_stream):
for frame in packet.decode():
new_frame = frame_func(frame)
if new_frame != frame:
HYLcool marked this conversation as resolved.
Show resolved Hide resolved
frame_modified = True
# for resize cases
output_video_stream.width = new_frame.width
output_video_stream.height = new_frame.height
Expand All @@ -379,6 +384,13 @@ def process_each_frame(input_video: Union[str, av.container.InputContainer],
container.close()
output_container.close()

if frame_modified:
return output_video
else:
shutil.rmtree(output_video, ignore_errors=True)
return (input_video
if isinstance(input_video, str) else input_video.name)


def extract_key_frames(input_video: Union[str, av.container.InputContainer]):
"""
Expand Down
3 changes: 2 additions & 1 deletion docs/Operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ The operators in Data-Juicer are categorized into 5 types.
| Type | Number | Description |
|-----------------------------------|:------:|-------------------------------------------------|
| [ Formatter ]( #formatter ) | 7 | Discovers, loads, and canonicalizes source data |
| [ Mapper ]( #mapper ) | 40 | Edits and transforms samples |
| [ Mapper ]( #mapper ) | 41 | Edits and transforms samples |
| [ Filter ]( #filter ) | 36 | Filters out low-quality samples |
| [ Deduplicator ]( #deduplicator ) | 5 | Detects and removes duplicate samples |
| [ Selector ]( #selector ) | 2 | Selects top samples based on ranking |
Expand Down Expand Up @@ -80,6 +80,7 @@ All the specific operators are listed below, each featured with several capabili
| sentence_split_mapper | General | en | Splits and reorganizes sentences according to semantics |
| video_captioning_from_audio_mapper | Multimodal | - | Caption a video according to its audio streams based on Qwen-Audio model |
| video_captioning_from_video_mapper | Multimodal | - | generate samples whose captions are generated based on another model (video-blip) and sampled video frame within the original sample |
| video_face_blur_mapper | Video | - | Blur faces detected in videos |
| video_ffmpeg_wrapped_mapper | Video | - | Simple wrapper to run a FFmpeg video filter |
| video_remove_watermark_mapper | Video | - | Remove the watermarks in videos given regions |
| video_resize_aspect_ratio_mapper | Video | - | Resize video aspect ratio to a specified range |
Expand Down
3 changes: 2 additions & 1 deletion docs/Operators_ZH.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Data-Juicer 中的算子分为以下 5 种类型。
| 类型 | 数量 | 描述 |
|------------------------------------|:--:|---------------|
| [ Formatter ]( #formatter ) | 7 | 发现、加载、规范化原始数据 |
| [ Mapper ]( #mapper ) | 40 | 对数据样本进行编辑和转换 |
| [ Mapper ]( #mapper ) | 41 | 对数据样本进行编辑和转换 |
| [ Filter ]( #filter ) | 36 | 过滤低质量样本 |
| [ Deduplicator ]( #deduplicator ) | 5 | 识别、删除重复样本 |
| [ Selector ]( #selector ) | 2 | 基于排序选取高质量样本 |
Expand Down Expand Up @@ -79,6 +79,7 @@ Data-Juicer 中的算子分为以下 5 种类型。
| sentence_split_mapper | General | en | 根据语义拆分和重组句子 |
| video_captioning_from_audio_mapper | Multimodal | - | 基于 Qwen-Audio 模型根据视频的音频流为视频生成新的标题描述 |
| video_captioning_from_video_mapper | Multimodal | - | 生成样本,其标题是根据另一个辅助模型(video-blip)和原始样本中的视频中指定帧的图像。 |
| video_face_blur_mapper | Video | - | 对视频中的人脸进行模糊处理 |
| video_ffmpeg_wrapped_mapper | Video | - | 运行 FFmpeg 视频过滤器的简单封装 |
| video_remove_watermark_mapper | Video | - | 去除视频中给定区域的水印 |
| video_resize_aspect_ratio_mapper | Video | - | 将视频的宽高比调整到指定范围内 |
Expand Down
8 changes: 8 additions & 0 deletions docs/sphinx_doc/source/data_juicer.ops.mapper.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ data\_juicer.ops.mapper.image\_diffusion\_mapper
:undoc-members:
:show-inheritance:

data\_juicer.ops.mapper.image\_face\_blur\_mapper
-------------------------------------------------------------

.. automodule:: data_juicer.ops.mapper.image_face_blur_mapper
:members:
:undoc-members:
:show-inheritance:

data\_juicer.ops.mapper.nlpaug\_en\_mapper
-------------------------------------------------

Expand Down
85 changes: 85 additions & 0 deletions tests/ops/mapper/test_video_face_blur_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
import shutil
import unittest

from datasets import Dataset

from data_juicer.ops.mapper.video_face_blur_mapper import VideoFaceBlurMapper
from data_juicer.utils.unittest_utils import DataJuicerTestCaseBase


class VideoFaceBlurMapperTest(DataJuicerTestCaseBase):

maxDiff = None

data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
'data')
vid1_path = os.path.join(data_path, 'video1.mp4')
vid4_path = os.path.join(data_path, 'video4.mp4')
vid5_path = os.path.join(data_path, 'video5.mp4')

@classmethod
def setUpClass(cls):
super().setUpClass()
cls.chk_path = os.path.join(cls.data_path, cls.__name__)
shutil.rmtree(cls.chk_path, ignore_errors=True)
os.makedirs(cls.chk_path)

def _run_helper(self, op, source_list, np=1):
dataset = Dataset.from_list(source_list)
dataset = dataset.map(op.process, num_proc=np)
res_list = dataset.to_list()
for source, res in zip(source_list, res_list):
self.assertEqual(len(source[op.video_key]), len(res[op.video_key]))
# for manual check
for path in res[op.video_key]:
basename = os.path.basename(path)
dst = f'{self.chk_path}/{op.blur_type}:{op.radius}_np:{np}_{basename}'
shutil.copy(path, dst)

def test_gaussian_radius(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid4_path]
}, {
'videos': [self.vid5_path]
}]
op = VideoFaceBlurMapper(blur_type='gaussian', radius=10)
self._run_helper(op, ds_list)

def test_box_radius(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid4_path]
}, {
'videos': [self.vid5_path]
}]
op = VideoFaceBlurMapper(blur_type='box', radius=10)
self._run_helper(op, ds_list)

def test_mean(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid4_path]
}, {
'videos': [self.vid5_path]
}]
op = VideoFaceBlurMapper(blur_type='mean')
self._run_helper(op, ds_list)

def test_gaussian_radius_parallel(self):
ds_list = [{
'videos': [self.vid1_path]
}, {
'videos': [self.vid4_path]
}, {
'videos': [self.vid5_path]
}]
op = VideoFaceBlurMapper(blur_type='gaussian', radius=10)
self._run_helper(op, ds_list, np=3)

if __name__ == '__main__':
unittest.main()
Loading