Skip to content

Commit

Permalink
use lazy_loader
Browse files Browse the repository at this point in the history
  • Loading branch information
BeachWang committed Sep 9, 2024
1 parent 7711b32 commit f6e669a
Show file tree
Hide file tree
Showing 32 changed files with 35 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from collections import defaultdict, deque
from typing import Dict, Set

import lazy_loader as lazy
import numpy as np
import regex
from jsonargparse.typing import PositiveInt
from loguru import logger

from data_juicer.utils.constant import HashKeys
import lazy_loader as lazy

from ..base_op import AUTOINSTALL, OPERATORS, Deduplicator
from ..common.helper_func import split_on_whitespace
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/deduplicator/image_deduplicator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from collections import defaultdict
from typing import Dict, Set, Tuple

import lazy_loader as lazy
import numpy as np

from data_juicer.utils.constant import HashKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import load_data_with_context, load_image

from ..base_op import AUTOINSTALL, OPERATORS, Deduplicator
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/deduplicator/ray_image_deduplicator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import PositiveInt

import lazy_loader as lazy
from data_juicer.utils.mm_utils import load_data_with_context, load_image

from ..base_op import AUTOINSTALL, OPERATORS
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_aesthetics_filter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval
from loguru import logger

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import load_data_with_context, load_image

from ...utils.model_utils import get_model, prepare_model
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_face_ratio_filter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import os

import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval
from loguru import logger

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (detect_faces, load_data_with_context,
load_image)
from data_juicer.utils.model_utils import get_model, prepare_model
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_nsfw_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import load_data_with_context, load_image
from data_juicer.utils.model_utils import get_model, prepare_model

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_text_matching_filter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval
from PIL import ImageOps

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (SpecialTokens, load_data_with_context,
load_image, remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_text_similarity_filter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval
from PIL import ImageOps

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (SpecialTokens, load_data_with_context,
load_image, remove_special_tokens)
from data_juicer.utils.model_utils import get_model, prepare_model
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/image_watermark_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import load_data_with_context, load_image
from data_juicer.utils.model_utils import get_model, prepare_model

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/language_id_score_filter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import List, Tuple, Union

import lazy_loader as lazy
from jsonargparse.typing import ClosedUnitInterval
from loguru import logger

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import AUTOINSTALL, OPERATORS, Filter
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/perplexity_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
# https://huggingface.co/spaces/huggingface/text-data-filtering
# --------------------------------------------------------

import lazy_loader as lazy
from jsonargparse.typing import PositiveFloat

from data_juicer.utils.constant import Fields, InterVars, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import AUTOINSTALL, OPERATORS, Filter
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/phrase_grounding_recall_filter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import List

import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval
from loguru import logger
from PIL import ImageOps

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (SpecialTokens, iou,
load_data_with_context, load_image,
remove_special_tokens)
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/stopwords_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
# https://huggingface.co/spaces/huggingface/text-data-filtering
# --------------------------------------------------------

import lazy_loader as lazy
from jsonargparse.typing import ClosedUnitInterval, List

from data_juicer.utils.asset_utils import ASSET_DIR, load_words_asset
from data_juicer.utils.constant import Fields, InterVars, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import AUTOINSTALL, OPERATORS, Filter
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/token_num_filter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import sys

import lazy_loader as lazy
from jsonargparse.typing import PositiveInt

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.model_utils import get_model, prepare_model

from ..base_op import AUTOINSTALL, OPERATORS, Filter
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_aesthetics_filter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval, PositiveInt
from loguru import logger

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (close_video, extract_key_frames,
extract_video_frames_uniformly,
load_data_with_context, load_video)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval, PositiveInt
from PIL import ImageOps

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (SpecialTokens, close_video,
extract_key_frames,
extract_video_frames_uniformly,
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_motion_score_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from contextlib import contextmanager
from typing import List, Optional, Sequence, Tuple, Union

import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import PositiveFloat, PositiveInt

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy

from ..base_op import AUTOINSTALL, OPERATORS, UNFORKABLE, Filter

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_nsfw_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval, PositiveInt

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (close_video, extract_key_frames,
extract_video_frames_uniformly,
load_data_with_context, load_video)
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_ocr_area_ratio_filter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import List, Union

import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval, PositiveInt

from data_juicer import cuda_device_count
from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (close_video,
extract_video_frames_uniformly,
load_data_with_context, load_video)
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/filter/video_watermark_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import ClosedUnitInterval, PositiveInt

from data_juicer.utils.constant import Fields, StatsKeys
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (close_video, extract_key_frames,
extract_video_frames_uniformly,
load_data_with_context, load_video)
Expand Down
3 changes: 2 additions & 1 deletion data_juicer/ops/mapper/audio_ffmpeg_wrapped_mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Dict, List, Optional

import lazy_loader as lazy

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.logger_utils import HiddenPrints

from ..base_op import AUTOINSTALL, OPERATORS, Mapper
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/image_face_blur_mapper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os

import lazy_loader as lazy
from loguru import logger

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (detect_faces, load_data_with_context,
load_image)
from data_juicer.utils.model_utils import get_model, prepare_model
Expand Down
3 changes: 1 addition & 2 deletions data_juicer/ops/mapper/nlpaug_en_mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from copy import deepcopy

from loguru import logger

import lazy_loader as lazy
from loguru import logger

from ..base_op import AUTOINSTALL, OPERATORS, Mapper

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/nlpcda_zh_mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from copy import deepcopy

import lazy_loader as lazy
from loguru import logger

import lazy_loader as lazy
from data_juicer.utils.logger_utils import HiddenPrints

from ..base_op import AUTOINSTALL, OPERATORS, Mapper
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/video_face_blur_mapper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os

import av
import lazy_loader as lazy

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (close_video, detect_faces,
load_data_with_context, load_video,
process_each_frame)
Expand Down
3 changes: 2 additions & 1 deletion data_juicer/ops/mapper/video_ffmpeg_wrapped_mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Dict, List, Optional

import lazy_loader as lazy

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.logger_utils import HiddenPrints

from ..base_op import AUTOINSTALL, OPERATORS, Mapper
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/video_remove_watermark_mapper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os

import av
import lazy_loader as lazy
import numpy as np
from jsonargparse.typing import List, PositiveInt

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.logger_utils import HiddenPrints
from data_juicer.utils.mm_utils import (close_video,
extract_video_frames_uniformly,
Expand Down
3 changes: 2 additions & 1 deletion data_juicer/ops/mapper/video_resize_aspect_ratio_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import os
from fractions import Fraction

import lazy_loader as lazy

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.logger_utils import HiddenPrints
from data_juicer.utils.mm_utils import close_video, load_video

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/video_resize_resolution_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import os
import sys

import lazy_loader as lazy
from jsonargparse.typing import PositiveInt

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import transfer_filename
import lazy_loader as lazy
from data_juicer.utils.logger_utils import HiddenPrints
from data_juicer.utils.mm_utils import close_video, load_video

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/video_split_by_scene_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
import re
from itertools import chain

import lazy_loader as lazy
from jsonargparse.typing import NonNegativeFloat, NonNegativeInt

from data_juicer.utils.constant import Fields
from data_juicer.utils.file_utils import (add_suffix_to_filename,
transfer_filename)
import lazy_loader as lazy
from data_juicer.utils.mm_utils import SpecialTokens

from ..base_op import AUTOINSTALL, OPERATORS, Mapper
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/video_tagging_from_audio_mapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import lazy_loader as lazy
import librosa

from data_juicer.utils.constant import Fields
import lazy_loader as lazy
from data_juicer.utils.mm_utils import extract_audio_from_video
from data_juicer.utils.model_utils import get_model, prepare_model

Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/video_tagging_from_frames_mapper.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from collections import Counter

import lazy_loader as lazy
from jsonargparse.typing import PositiveInt

from data_juicer.utils.constant import Fields
import lazy_loader as lazy
from data_juicer.utils.mm_utils import (close_video, extract_key_frames,
extract_video_frames_uniformly,
load_data_with_context, load_video)
Expand Down

0 comments on commit f6e669a

Please sign in to comment.