Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix for Image2Struct #2640

Merged
merged 4 commits into from
May 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/helm/benchmark/metrics/vision_language/emd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ def compute_emd_recursive(
assert max_num_patches > 0
assert 0 < weight_most_frequent_color <= 1

# Convert the images to RGB first. Some images have 4 channels (RGBA)
img1_PIL = img1_PIL.convert("RGB")
img2_PIL = img2_PIL.convert("RGB")

# Resize the images so that there are not too many patches
# Try to maintain the aspect ratio and resize to a multiple of the patch size
num_patches = math.ceil(img1_PIL.size[0] / patch_size[0]) * math.ceil(img1_PIL.size[1] / patch_size[1])
Expand Down
42 changes: 23 additions & 19 deletions src/helm/benchmark/metrics/vision_language/image_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ class AnnotatedImageMetrics(Metric):

# Metric names
COMPILE_METRIC: str = "compilation_success"
BLOCK_EARTH_MOVER_SIMILARITY_NORM1: str = "block_emd_similarity_white"
BLOCK_EARTH_MOVER_SIMILARITY_NORM2: str = "block_emd_similarity_median_color"
BLOCK_EARTH_MOVER_SIMILARITY: str = "block_emd_similarity"
EARTH_MOVER_SIMILARITY = "earth_mover_similarity"
EARTH_MOVER_SIMILARITY_WHITE = "earth_mover_similarity_white"
BLOCK_EMD: str = "block_emd"
PIXEL_SIMILARITY: str = "pixel_similarity"
SIFT_SIMILARITY: str = "sift_similarity"
LPIPS_SIMILARITY: str = "lpips_similarity"
Expand Down Expand Up @@ -108,12 +108,13 @@ def __init__(self, generation_type: str, metric_names: List[str], size_handling_
metrics: List[AnnotatedMetric] = [
AnnotatedMetric(self.PIXEL_SIMILARITY, pixel_similarity, "image_np_gray"),
AnnotatedMetric(self.SIFT_SIMILARITY, sift_similarity, "image_np"),
# Raw block EMD
AnnotatedMetric(self.BLOCK_EARTH_MOVER_SIMILARITY, self.compute_block_emd_raw, "image_PIL"),
# Normalized block EMD against white
AnnotatedMetric(self.BLOCK_EARTH_MOVER_SIMILARITY_NORM1, self.compute_block_emd_white, "image_PIL"),
# Normalized block EMD against median
AnnotatedMetric(self.BLOCK_EARTH_MOVER_SIMILARITY_NORM2, self.compute_block_emd_extreme, "image_PIL"),
AnnotatedMetric(self.BLOCK_EMD, self.compute_block_emd_raw, "image_PIL"), # Raw block-EMD
AnnotatedMetric(
self.EARTH_MOVER_SIMILARITY_WHITE, self.ems_white, "image_PIL"
), # Normalized block-EMD against white
AnnotatedMetric(
self.EARTH_MOVER_SIMILARITY, self.ems, "image_PIL"
), # Normalized block-EMD against black/white
AnnotatedMetric(self.LPIPS_SIMILARITY, self.lpips_similarity, "image_PIL"),
AnnotatedMetric(self.FID_SIMILARITY, self.fid_similarity, "image_PIL"),
AnnotatedMetric(self.SSIM_SIMILARITY, self.compute_ssim, "image_np_gray"),
Expand Down Expand Up @@ -414,7 +415,7 @@ def compute_edit_sim(self, completion: str, reference: str) -> float:
result = _edit_similarity(completion_tokens, truncated_reference_tokens)
return result

def compute_block_emd_white(
def ems_white(
self,
pred_image: Image.Image,
ref_image: Image.Image,
Expand Down Expand Up @@ -455,17 +456,18 @@ def compute_denominator():

hash_dict = {
"reference_image": str(AnnotatedImageMetrics.HASH_FUNC(ref_image, hash_size=self.HASH_LENGTH)),
"generated_image": str(AnnotatedImageMetrics.HASH_FUNC(pred_image, hash_size=self.HASH_LENGTH)),
}
cache_key_numerator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY}", **hash_dict}
cache_key_denominator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY_NORM1}", **hash_dict}
cache_key_numerator = {"metric_name": f"intermediate_{self.BLOCK_EMD}", **hash_dict}
cache_key_denominator = {"metric_name": "intermediate_ems_white_denominator", **hash_dict}

assert self._cache is not None
emd_raw, _ = self._cache.get(cache_key_numerator, compute_numerator)
emd_base, _ = self._cache.get(cache_key_denominator, compute_denominator)

return 1.0 - emd_raw["value"] / emd_base["value"]
return max(0, 1.0 - emd_raw["value"] / emd_base["value"])

def compute_block_emd_extreme(
def ems(
self,
pred_image: Image.Image,
ref_image: Image.Image,
Expand Down Expand Up @@ -513,9 +515,10 @@ def compute_denominator():

hash_dict = {
"reference_image": str(AnnotatedImageMetrics.HASH_FUNC(ref_image, hash_size=self.HASH_LENGTH)),
"generated_image": str(AnnotatedImageMetrics.HASH_FUNC(pred_image, hash_size=self.HASH_LENGTH)),
}
cache_key_numerator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY}", **hash_dict}
cache_key_denominator = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY_NORM2}", **hash_dict}
cache_key_numerator = {"metric_name": f"intermediate_{self.BLOCK_EMD}", **hash_dict}
cache_key_denominator = {"metric_name": "intermediate_ems_extreme_denominator", **hash_dict}

assert self._cache is not None
emd_raw, _ = self._cache.get(cache_key_numerator, compute_numerator)
Expand Down Expand Up @@ -546,8 +549,9 @@ def compute():

hash_dict = {
"reference_image": str(AnnotatedImageMetrics.HASH_FUNC(ref_image, hash_size=self.HASH_LENGTH)),
"generated_image": str(AnnotatedImageMetrics.HASH_FUNC(pred_image, hash_size=self.HASH_LENGTH)),
}
cache_key = {"metric_name": f"intermediate_{self.BLOCK_EARTH_MOVER_SIMILARITY}", **hash_dict}
cache_key = {"metric_name": f"intermediate_{self.BLOCK_EMD}", **hash_dict}
assert self._cache is not None
emd_raw, _ = self._cache.get(cache_key, compute)

Expand All @@ -564,8 +568,8 @@ def compute_block_emd_raw_wrapper(
use_tqdm: bool = False,
):
"""Computes the block Earth Moving Distance (EMD). This attempts to
speed up EMD for images with huge areas by considering movement/transformatio
of blocks of pixels. The score is normalized against EMD against white images
speed up EMD for images with huge areas by considering
movement/transformation of blocks of pixels.
"""
emd_value = compute_emd_recursive(
pred_image,
Expand Down
6 changes: 3 additions & 3 deletions src/helm/benchmark/run_specs/vlm_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def _get_image2structure_metric_specs(
metric_names = [
AnnotatedImageMetrics.PIXEL_SIMILARITY,
AnnotatedImageMetrics.FID_SIMILARITY,
AnnotatedImageMetrics.BLOCK_EARTH_MOVER_SIMILARITY,
AnnotatedImageMetrics.BLOCK_EARTH_MOVER_SIMILARITY_NORM2,
AnnotatedImageMetrics.BLOCK_EARTH_MOVER_SIMILARITY_NORM1,
AnnotatedImageMetrics.BLOCK_EMD,
AnnotatedImageMetrics.EARTH_MOVER_SIMILARITY,
AnnotatedImageMetrics.EARTH_MOVER_SIMILARITY_WHITE,
]
if include_edit_similarity:
metric_names.append(AnnotatedImageMetrics.EDIT_SIMILARITY)
Expand Down
32 changes: 15 additions & 17 deletions src/helm/benchmark/static/schema_image2structure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,20 @@ metrics:
description: Average Levenshtein edit similarity (1 - distance normalized by length of longer sequence) between model generation and reference.

# Vision Language metrics [image]:
- name: block_emd_similarity
display_name: Block Earth Mover Similarity
short_display_name: Block EMS
description: Block Earth Mover Similarity
- name: block_emd
display_name: Block Earth Mover Distance
short_display_name: Block-EMD
description: Block Earth Mover Distance (EMD adapted to speed up calculations)
lower_is_better: true
- name: earth_mover_similarity
display_name: Earth Mover Similarity
short_display_name: EMS
description: Earth Mover Similarity
lower_is_better: false
- name: block_emd_similarity_white
display_name: Block Earth Mover Similarity (white)
short_display_name: Block EMS (white)
description: Block Earth Mover Similarity (white)
lower_is_better: false
- name: block_emd_similarity_median_color
display_name: Block Earth Mover Similarity (median)
short_display_name: Block EMS (median)
description: Block Earth Mover Similarity (median)
- name: earth_mover_similarity_white
display_name: Earth Mover Similarity (against White)
short_display_name: EMS (white)
description: Earth Mover Similarity against white image
lower_is_better: false
- name: pixel_similarity
display_name: Pixel Similarity
Expand Down Expand Up @@ -169,11 +169,9 @@ metric_groups:
split: ${main_split}
- name: fid_similarity
split: ${main_split}
- name: block_emd_similarity
split: ${main_split}
- name: block_emd_similarity_white
- name: block_emd
split: ${main_split}
- name: block_emd_similarity_median_color
- name: earth_mover_similarity
split: ${main_split}

- name: generation_text
Expand Down