Skip to content

Commit

Permalink
open README.md with unicode (to support Hugging Face emoji); fix va…
Browse files Browse the repository at this point in the history
…rious typos (#218)

(close #217, #66, #67, #69, #91, #126, #127, #145)
  • Loading branch information
ronghanghu committed Aug 14, 2024
1 parent 0db838b commit 7e1596c
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion sam2/modeling/position_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class PositionEmbeddingSine(nn.Module):
"""
This is a more standard version of the position embedding, very similar to the one
used by the Attention is all you need paper, generalized to work on images.
used by the Attention Is All You Need paper, generalized to work on images.
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion sam2/modeling/sam2_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def _prepare_memory_conditioned_features(
pix_feat_with_mem = pix_feat_with_mem.permute(1, 2, 0).view(B, C, H, W)
return pix_feat_with_mem

# Use a dummy token on the first frame (to avoid emtpy memory input to tranformer encoder)
# Use a dummy token on the first frame (to avoid empty memory input to tranformer encoder)
to_cat_memory = [self.no_mem_embed.expand(1, B, self.mem_dim)]
to_cat_memory_pos_embed = [self.no_mem_pos_enc.expand(1, B, self.mem_dim)]

Expand Down
2 changes: 1 addition & 1 deletion sam2/sam2_image_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def predict_batch(
normalize_coords=True,
) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
"""This function is very similar to predict(...), however it is used for batched mode, when the model is expected to generate predictions on multiple images.
It returns a tupele of lists of masks, ious, and low_res_masks_logits.
It returns a tuple of lists of masks, ious, and low_res_masks_logits.
"""
assert self._is_batch, "This function should only be used when in batched mode"
if not self._is_image_set:
Expand Down
6 changes: 3 additions & 3 deletions sam2/sam2_video_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def init_state(
offload_state_to_cpu=False,
async_loading_frames=False,
):
"""Initialize a inference state."""
"""Initialize an inference state."""
compute_device = self.device # device of the model
images, video_height, video_width = load_video_frames(
video_path=video_path,
Expand Down Expand Up @@ -589,7 +589,7 @@ def propagate_in_video_preflight(self, inference_state):
# to `propagate_in_video_preflight`).
consolidated_frame_inds = inference_state["consolidated_frame_inds"]
for is_cond in [False, True]:
# Separately consolidate conditioning and non-conditioning temp outptus
# Separately consolidate conditioning and non-conditioning temp outputs
storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
# Find all the frames that contain temporary outputs for any objects
# (these should be the frames that have just received clicks for mask inputs
Expand All @@ -598,7 +598,7 @@ def propagate_in_video_preflight(self, inference_state):
for obj_temp_output_dict in temp_output_dict_per_obj.values():
temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
consolidated_frame_inds[storage_key].update(temp_frame_inds)
# consolidate the temprary output across all objects on this frame
# consolidate the temporary output across all objects on this frame
for frame_idx in temp_frame_inds:
consolidated_out = self._consolidate_temp_output_across_obj(
inference_state, frame_idx, is_cond=is_cond, run_mem_encoder=True
Expand Down
4 changes: 2 additions & 2 deletions sam2/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def mask_to_box(masks: torch.Tensor):
compute bounding box given an input mask
Inputs:
- masks: [B, 1, H, W] boxes, dtype=torch.Tensor
- masks: [B, 1, H, W] masks, dtype=torch.Tensor
Returns:
- box_coords: [B, 1, 4], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.Tensor
Expand Down Expand Up @@ -120,7 +120,7 @@ def __init__(
self.offload_video_to_cpu = offload_video_to_cpu
self.img_mean = img_mean
self.img_std = img_std
# items in `self._images` will be loaded asynchronously
# items in `self.images` will be loaded asynchronously
self.images = [None] * len(img_paths)
# catch and raise any exceptions in the async loading thread
self.exception = None
Expand Down
2 changes: 1 addition & 1 deletion sav_dataset/sav_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
parser.add_argument(
"--do_not_skip_first_and_last_frame",
help="In SA-V val and test, we skip the first and the last annotated frames in evaluation. "
"Set this to true for evaluation on settings that doen't skip first and last frames",
"Set this to true for evaluation on settings that doesn't skip first and last frames",
action="store_true",
)

Expand Down
2 changes: 1 addition & 1 deletion sav_dataset/utils/sav_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def _seg2bmap(seg, width=None, height=None):

assert not (
width > w | height > h | abs(ar1 - ar2) > 0.01
), "Can" "t convert %dx%d seg to %dx%d bmap." % (w, h, width, height)
), "Cannot convert %dx%d seg to %dx%d bmap." % (w, h, width, height)

e = np.zeros_like(seg)
s = np.zeros_like(seg)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
LICENSE = "Apache 2.0"

# Read the contents of README file
with open("README.md", "r") as f:
with open("README.md", "r", encoding="utf-8") as f:
LONG_DESCRIPTION = f.read()

# Required dependencies
Expand Down

0 comments on commit 7e1596c

Please sign in to comment.