Skip to content

Commit

Permalink
[fix] google video text detection : textAnnotations key error
Browse files Browse the repository at this point in the history
  • Loading branch information
floflokie committed Dec 11, 2023
1 parent df2450d commit 3432144
Show file tree
Hide file tree
Showing 2 changed files with 9,754 additions and 9,742 deletions.
124 changes: 68 additions & 56 deletions edenai_apis/apis/google/google_video_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,12 @@ def video__label_detection_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType[LabelDetectionAsyncDataClass]:
result = google_video_get_job(provider_job_id)

if result.get("done"):
annotations = result["response"]["annotationResults"][0]
label: List[dict] = annotations.get("segmentLabelAnnotations", []) + annotations.get(
"shotLabelAnnotations", []
)
label: List[dict] = annotations.get(
"segmentLabelAnnotations", []
) + annotations.get("shotLabelAnnotations", [])
label_list = []

for entity in label:
Expand Down Expand Up @@ -243,11 +243,11 @@ def video__text_detection_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType[TextDetectionAsyncDataClass]:
result = google_video_get_job(provider_job_id)

if result.get("done"):
annotations = result["response"]["annotationResults"][0]
texts = []
for annotation in annotations["textAnnotations"]:
for annotation in annotations.get("textAnnotations", []):
frames = []
description = annotation["text"]
for segment in annotation["segments"]:
Expand Down Expand Up @@ -289,7 +289,7 @@ def video__face_detection_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType[FaceDetectionAsyncDataClass]:
result = google_video_get_job(provider_job_id)

if result.get("done"):
faces = []
response = result["response"]["annotationResults"][0]
Expand All @@ -299,33 +299,33 @@ def video__face_detection_async__get_job_result(
timestamp = float(
track["timestampedObjects"][0]["timeOffset"][:-1]
)

top = float(
track["timestampedObjects"][0][
"normalizedBoundingBox"
].get("top", 0)
track["timestampedObjects"][0]["normalizedBoundingBox"].get(
"top", 0
)
)
left = float(
track["timestampedObjects"][0][
"normalizedBoundingBox"
].get("left", 0)
track["timestampedObjects"][0]["normalizedBoundingBox"].get(
"left", 0
)
right= float(
track["timestampedObjects"][0][
"normalizedBoundingBox"
].get("right", 0)
)
right = float(
track["timestampedObjects"][0]["normalizedBoundingBox"].get(
"right", 0
)
)
bottom = float(
track["timestampedObjects"][0][
"normalizedBoundingBox"
].get("bottom", 0)
track["timestampedObjects"][0]["normalizedBoundingBox"].get(
"bottom", 0
)
)
# Bounding box
bounding_box = VideoBoundingBox(
top=top,
left=left,
height = 1 - (top + (1 - bottom)),
width= 1 - (left + (1 - right)),
height=1 - (top + (1 - bottom)),
width=1 - (left + (1 - right)),
)
attribute_dict = {}
for attr in track["timestampedObjects"][0].get(
Expand Down Expand Up @@ -377,23 +377,31 @@ def video__person_tracking_async__get_job_result(
for track in person["tracks"]:
for time_stamped_object in track["timestampedObjects"]:
top = float(
time_stamped_object["normalizedBoundingBox"].get("top",0)
time_stamped_object["normalizedBoundingBox"].get(
"top", 0
)
)
left = float(
time_stamped_object["normalizedBoundingBox"].get("left",0)
time_stamped_object["normalizedBoundingBox"].get(
"left", 0
)
right =float(
time_stamped_object["normalizedBoundingBox"].get("right",0)
)
right = float(
time_stamped_object["normalizedBoundingBox"].get(
"right", 0
)
)
bottom = float(
time_stamped_object["normalizedBoundingBox"].get("bottom",0)
time_stamped_object["normalizedBoundingBox"].get(
"bottom", 0
)
)
# Bounding box
bounding_box = VideoTrackingBoundingBox(
top=top,
left=left,
height = 1 - (top + (1 - bottom)),
width= 1 - (left + (1 - right)),
height=1 - (top + (1 - bottom)),
width=1 - (left + (1 - right)),
)

# Timeoffset
Expand Down Expand Up @@ -433,7 +441,9 @@ def video__person_tracking_async__get_job_result(
eye_left=landmark_output.get("left_eye", []),
eye_right=landmark_output.get("right_eye", []),
shoulder_left=landmark_output.get("left_shoulder", []),
shoulder_right=landmark_output.get("right_shoulder", []),
shoulder_right=landmark_output.get(
"right_shoulder", []
),
elbow_left=landmark_output.get("left_elbow", []),
elbow_right=landmark_output.get("right_elbow", []),
wrist_left=landmark_output.get("left_wrist", []),
Expand All @@ -453,7 +463,9 @@ def video__person_tracking_async__get_job_result(
attributes=tracked_attributes,
landmarks=landmark_tracking,
bounding_box=bounding_box,
poses=VideoPersonPoses(pitch=None, roll=None, yaw=None),
poses=VideoPersonPoses(
pitch=None, roll=None, yaw=None
),
quality=VideoPersonQuality(
brightness=None, sharpness=None
),
Expand All @@ -479,7 +491,7 @@ def video__logo_detection_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType[LogoDetectionAsyncDataClass]:
result = google_video_get_job(provider_job_id)

if result.get("done"):
response = result["response"]["annotationResults"][0]
tracks = []
Expand All @@ -491,23 +503,31 @@ def video__logo_detection_async__get_job_result(
for time_stamped_object in track["timestampedObjects"]:
timestamp = float(time_stamped_object["timeOffset"][:-1])
top = float(
time_stamped_object["normalizedBoundingBox"].get("top",0)
time_stamped_object["normalizedBoundingBox"].get(
"top", 0
)
left =float(
time_stamped_object["normalizedBoundingBox"].get("left",0)
)
left = float(
time_stamped_object["normalizedBoundingBox"].get(
"left", 0
)
right =float(
time_stamped_object["normalizedBoundingBox"].get("right",0)
)
right = float(
time_stamped_object["normalizedBoundingBox"].get(
"right", 0
)
)
bottom = float(
time_stamped_object["normalizedBoundingBox"].get("bottom",0)
time_stamped_object["normalizedBoundingBox"].get(
"bottom", 0
)
)
# Bounding box
bounding_box = VideoLogoBoundingBox(
top=top,
left=left,
height = 1 - (top + (1 - bottom)),
width= 1 - (left + (1 - right)),
height=1 - (top + (1 - bottom)),
width=1 - (left + (1 - right)),
)

objects.append(
Expand Down Expand Up @@ -546,24 +566,16 @@ def video__object_tracking_async__get_job_result(
description = detected_object["entity"]["description"]
for frame in detected_object["frames"]:
timestamp = float(frame["timeOffset"][:-1])
top = float(
frame["normalizedBoundingBox"].get("top",0)
)
left = float(
frame["normalizedBoundingBox"].get("left",0)
)
right = float(
frame["normalizedBoundingBox"].get("right",0)
)
bottom = float(
frame["normalizedBoundingBox"].get("bottom",0)
)
top = float(frame["normalizedBoundingBox"].get("top", 0))
left = float(frame["normalizedBoundingBox"].get("left", 0))
right = float(frame["normalizedBoundingBox"].get("right", 0))
bottom = float(frame["normalizedBoundingBox"].get("bottom", 0))
# Bounding box
bounding_box = VideoObjectBoundingBox(
top=top,
left=left,
height = 1 - (top + (1 - bottom)),
width= 1 - (left + (1 - right)),
height=1 - (top + (1 - bottom)),
width=1 - (left + (1 - right)),
)
frames.append(
ObjectFrame(timestamp=timestamp, bounding_box=bounding_box)
Expand Down Expand Up @@ -591,7 +603,7 @@ def video__explicit_content_detection_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType[ExplicitContentDetectionAsyncDataClass]:
result = google_video_get_job(provider_job_id)

if result.get("error"):
raise ProviderException(result["error"].get("message"))

Expand Down
Loading

0 comments on commit 3432144

Please sign in to comment.