Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Pass along model version in GenerateContentResponse #621

Merged
merged 5 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions google/generativeai/types/generation_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,16 @@ def _join_chunks(chunks: Iterable[protos.GenerateContentResponse]):
else:
usage_metadata = None

if "model_version" in chunks[-1]:
model_version = chunks[-1].model_version
else:
model_version = None

return protos.GenerateContentResponse(
candidates=_join_candidate_lists(c.candidates for c in chunks),
prompt_feedback=_join_prompt_feedbacks(c.prompt_feedback for c in chunks),
usage_metadata=usage_metadata,
model_version=model_version,
)


Expand Down Expand Up @@ -539,6 +545,10 @@ def prompt_feedback(self):
def usage_metadata(self):
return self._result.usage_metadata

@property
def model_version(self):
return self._result.model_version

def __str__(self) -> str:
if self._done:
_iterator = "None"
Expand Down
144 changes: 119 additions & 25 deletions samples/rest/text_generation.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ SCRIPT_DIR=$(dirname "$0")
MEDIA_DIR=$(realpath ${SCRIPT_DIR}/../../third_party)

IMG_PATH=${MEDIA_DIR}/organ.jpg
IMG_PATH2=${MEDIA_DIR}/Cajun_instruments.jpg
AUDIO_PATH=${MEDIA_DIR}/sample.mp3
VIDEO_PATH=${MEDIA_DIR}/Big_Buck_Bunny.mp4
PDF_PATH=${MEDIA_DIR}/test.pdf
Expand Down Expand Up @@ -38,43 +39,136 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:s

echo "[START text_gen_multimodal_one_image_prompt]"
# [START text_gen_multimodal_one_image_prompt]
# Use a temporary file to hold the base64 encoded image data
TEMP_B64=$(mktemp)
trap 'rm -f "$TEMP_B64"' EXIT
base64 $B64FLAGS $IMG_PATH > "$TEMP_B64"

# Use a temporary file to hold the JSON payload
TEMP_JSON=$(mktemp)
trap 'rm -f "$TEMP_JSON"' EXIT

cat > "$TEMP_JSON" << EOF
{
"contents": [{
"parts":[
{"text": "Tell me about this instrument"},
{
"inline_data": {
"mime_type":"image/jpeg",
"data": "$(cat "$TEMP_B64")"
}
}
]
}]
}
EOF

curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
"contents": [{
"parts":[
{"text": "Tell me about this instrument"},
{
"inline_data": {
"mime_type":"image/jpeg",
"data": "'$(base64 $B64FLAGS $IMG_PATH)'"
}
}
]
}]
}' 2> /dev/null
-d "@$TEMP_JSON" 2> /dev/null
# [END text_gen_multimodal_one_image_prompt]

echo "[START text_gen_multimodal_one_image_prompt_streaming]"
# [START text_gen_multimodal_one_image_prompt_streaming]
cat > "$TEMP_JSON" << EOF
{
"contents": [{
"parts":[
{"text": "Tell me about this instrument"},
{
"inline_data": {
"mime_type":"image/jpeg",
"data": "$(cat "$TEMP_B64")"
}
}
]
}]
}
EOF

curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d '{
"contents": [{
-d "@$TEMP_JSON" 2> /dev/null
# [END text_gen_multimodal_one_image_prompt_streaming]

echo "[START text_gen_multimodal_two_image_prompt]"
# [START text_gen_multimodal_two_image_prompt]
# Base64 encode both images into temporary files
TEMP_B64_1=$(mktemp)
TEMP_B64_2=$(mktemp)
trap 'rm -f "$TEMP_B64_1" "$TEMP_B64_2"' EXIT
base64 $B64FLAGS "$IMG_PATH" > "$TEMP_B64_1"
base64 $B64FLAGS "$IMG_PATH2" > "$TEMP_B64_2"

# Create the JSON payload using the base64 data from both images
cat > "$TEMP_JSON" << EOF
{
"contents": [{
"parts":[
{"text": "Tell me about this instrument"},
{
"inline_data": {
"mime_type":"image/jpeg",
"data": "'$(base64 $B64FLAGS $IMG_PATH)'"
}
"inline_data": {
"mime_type": "image/jpeg",
"data": "$(cat "$TEMP_B64_1")"
}
},
{
"inline_data": {
"mime_type": "image/jpeg",
"data": "$(cat "$TEMP_B64_2")"
}
},
{
"text": "Generate a list of all the objects contained in both images."
}
]
}]
}' 2> /dev/null
# [END text_gen_multimodal_one_image_prompt_streaming]
}]
}
EOF

# Make the API request using the JSON file
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d "@$TEMP_JSON" 2> /dev/null > response.json

# Display the response
cat response.json
# [END text_gen_multimodal_two_image_prompt]

echo "[START text_gen_multimodal_one_image_bounding_box_prompt]"
# [START text_gen_multimodal_one_image_bounding_box_prompt]
# Re-use TEMP_B64_2 (from the previous two-image prompt) and TEMP_JSON

# Create the JSON payload for bounding box detection
cat > "$TEMP_JSON" << EOF
{
"contents": [{
"parts":[
{
"inline_data": {
"mime_type": "image/jpeg",
"data": "$(cat "$TEMP_B64_2")"
}
},
{
"text": "Generate bounding boxes for each of the objects in this image in [y_min, x_min, y_max, x_max] format."
}
]
}]
}
EOF

# Make the API request using the JSON file
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:generateContent?key=$GOOGLE_API_KEY" \
-H 'Content-Type: application/json' \
-X POST \
-d "@$TEMP_JSON" 2> /dev/null > response.json

cat response.json
# [END text_gen_multimodal_one_image_bounding_box_prompt]

echo "[START text_gen_multimodal_audio]"
# [START text_gen_multimodal_audio]
Expand Down Expand Up @@ -184,7 +278,7 @@ DISPLAY_NAME=VIDEO
# Initial resumable request defining metadata.
# The upload url is in the response headers dump them to a file.
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
-D upload-header.tmp \
-D "${tmp_header_file}" \
-H "X-Goog-Upload-Protocol: resumable" \
-H "X-Goog-Upload-Command: start" \
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
Expand Down Expand Up @@ -226,7 +320,7 @@ curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:g
-d '{
"contents": [{
"parts":[
{"text": "Please describe this file."},
{"text": "Transcribe the audio from this video, giving timestamps for salient events in the video. Also provide visual descriptions."},
{"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]
}]
}' 2> /dev/null > response.json
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_version():
release_status = "Development Status :: 5 - Production/Stable"

dependencies = [
"google-ai-generativelanguage==0.6.10",
"google-ai-generativelanguage==0.6.13",
"google-api-core",
"google-api-python-client",
"google-auth>=2.15.0", # 2.15 adds API key auth support
Expand Down
3 changes: 3 additions & 0 deletions tests/test_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,8 @@ def test_join_chunks(self):
prompt_token_count=5
)

chunks[-1].model_version = "gemini-1.5-flash-002"

result = generation_types._join_chunks(chunks)

expected = protos.GenerateContentResponse(
Expand All @@ -509,6 +511,7 @@ def test_join_chunks(self):
],
},
"usage_metadata": {"prompt_token_count": 5},
"model_version": "gemini-1.5-flash-002",
},
)

Expand Down
Loading