Add pdf samples (#484)

* Add pdf samples Change-Id: I835c4805081af3aa6ce26a8871a62b5c435f18bf * Fix streaming video Change-Id: Iec0000da192231a7a5f97faabaeae9d3ebe64475 * format Change-Id: I51705e0f3b96d825952a3183bc55cbed5cb158c0
google-gemini · Jul 23, 2024 · f8b049f · f8b049f
1 parent 353dc4f
commit f8b049f
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 4 deletions.
diff --git a/samples/count_tokens.py b/samples/count_tokens.py
@@ -167,6 +167,17 @@ def test_tokens_multimodal_video_audio_file_api(self):
 
         # [END tokens_multimodal_video_audio_file_api]
 
+    def test_tokens_multimodal_pdf_file_api(self):
+        # [START tokens_multimodal_pdf_file_api]
+        model = genai.GenerativeModel("gemini-1.5-flash")
+        sample_pdf = genai.upload_file(media / "test.pdf")
+        token_count = model.count_tokens(["Give me a summary of this document.", sample_pdf])
+        print(f"{token_count=}")
+
+        response = model.generate_content(["Give me a summary of this document.", sample_pdf])
+        print(response.usage_metadata)
+        # [END tokens_multimodal_pdf_file_api]
+
     def test_tokens_cached_content(self):
         # [START tokens_cached_content]
         import time

diff --git a/samples/files.py b/samples/files.py
@@ -75,6 +75,14 @@ def test_files_create_video(self):
         print(f"{result.text=}")
         # [END files_create_video]
 
+    def test_files_create_pdf(self):
+        # [START files_create_pdf]
+        model = genai.GenerativeModel("gemini-1.5-flash")
+        sample_pdf = genai.upload_file(media / "test.pdf")
+        response = model.generate_content(["Give me a summary of this pdf file.", sample_pdf])
+        print(response.text)
+        # [END files_create_pdf]
+
     def test_files_list(self):
         # [START files_list]
         print("My files:")

diff --git a/samples/text_generation.py b/samples/text_generation.py
@@ -96,6 +96,17 @@ def test_text_gen_multimodal_audio(self):
         print(response.text)
         # [END text_gen_multimodal_audio]
 
+    def test_text_gen_multimodal_audio_streaming(self):
+        # [START text_gen_multimodal_audio_streaming]
+        model = genai.GenerativeModel("gemini-1.5-flash")
+        sample_audio = genai.upload_file(media / "sample.mp3")
+        response = model.generate_content(["Give me a summary of this audio file.", sample_audio])
+
+        for chunk in response:
+            print(chunk.text)
+            print("_" * 80)
+        # [END text_gen_multimodal_audio_streaming]
+
     def test_text_gen_multimodal_video_prompt(self):
         # [START text_gen_multimodal_video_prompt]
         import time
@@ -111,20 +122,51 @@ def test_text_gen_multimodal_video_prompt(self):
             myfile = genai.get_file(myfile.name)
 
         model = genai.GenerativeModel("gemini-1.5-flash")
-        result = model.generate_content([myfile, "Describe this video clip"])
-        print(f"{result.text=}")
+        response = model.generate_content([myfile, "Describe this video clip"])
+        print(f"{response.text=}")
         # [END text_gen_multimodal_video_prompt]
 
     def test_text_gen_multimodal_video_prompt_streaming(self):
         # [START text_gen_multimodal_video_prompt_streaming]
+        import time
+
+        # Video clip (CC BY 3.0) from https://peach.blender.org/download/
+        myfile = genai.upload_file(media / "Big_Buck_Bunny.mp4")
+        print(f"{myfile=}")
+
+        # Videos need to be processed before you can use them.
+        while myfile.state.name == "PROCESSING":
+            print("processing video...")
+            time.sleep(5)
+            myfile = genai.get_file(myfile.name)
+
         model = genai.GenerativeModel("gemini-1.5-flash")
-        video = genai.upload_file(media / "Big_Buck_Bunny.mp4")
-        response = model.generate_content(["Describe this video clip.", video], stream=True)
+
+        response = model.generate_content([myfile, "Describe this video clip"])
         for chunk in response:
             print(chunk.text)
             print("_" * 80)
         # [END text_gen_multimodal_video_prompt_streaming]
 
+    def test_text_gen_multimodal_pdf(self):
+        # [START text_gen_multimodal_pdf]
+        model = genai.GenerativeModel("gemini-1.5-flash")
+        sample_pdf = genai.upload_file(media / "test.pdf")
+        response = model.generate_content(["Give me a summary of this document:", sample_pdf])
+        print(f"{response.text=}")
+        # [END text_gen_multimodal_pdf]
+
+    def test_text_gen_multimodal_pdf_streaming(self):
+        # [START text_gen_multimodal_pdf_streaming]
+        model = genai.GenerativeModel("gemini-1.5-flash")
+        sample_pdf = genai.upload_file(media / "test.pdf")
+        response = model.generate_content(["Give me a summary of this document:", sample_pdf])
+
+        for chunk in response:
+            print(chunk.text)
+            print("_" * 80)
+        # [END text_gen_multimodal_pdf_streaming]
+
 
 if __name__ == "__main__":
     absltest.main()
diff --git a/third_party/LICENSE.txt b/third_party/LICENSE.txt
@@ -8,3 +8,6 @@
   * This is the first paragraph from Shakespeare's "spring", public domain.
 * Cajun_instruments.jpg
   * This image is from Wikimedia Commons, a public domain (https://commons.wikimedia.org/wiki/Category:Musical_instruments#/media/File:Cajun_instruments.jpg).
+* test.pdf 
+  * This is the first 2 pages of https://arxiv.org/abs/2403.05530 by Google Gemini Team.
+  * License: CC-BY 4.0
diff --git a/third_party/test.pdf b/third_party/test.pdf