Revert "Merge pull request #71 from cpacker/pdf-support"

This reverts commit e3325a0, reversing changes made to 3c4562e.
letta-ai · Oct 21, 2023 · 82bb457 · 82bb457
1 parent 5b99f08
commit 82bb457
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 13 deletions.
diff --git a/memgpt/utils.py b/memgpt/utils.py
@@ -11,7 +11,6 @@
 import tiktoken
 import glob
 import sqlite3
-import fitz
 from tqdm import tqdm
 from memgpt.openai_tools import async_get_embedding_with_backoff
 
@@ -99,12 +98,6 @@ def read_in_chunks(file_object, chunk_size):
             break
         yield data
 
-def read_pdf_in_chunks(file, chunk_size):
-    doc = fitz.open(file)
-    for page in doc:
-        text = page.get_text()
-        yield text
-
 def read_in_rows_csv(file_object, chunk_size):
     csvreader = csv.reader(file_object)
     header = next(csvreader)
@@ -130,11 +123,7 @@ def total_bytes(pattern):
 def chunk_file(file, tkns_per_chunk=300, model='gpt-4'):
     encoding = tiktoken.encoding_for_model(model)
     with open(file, 'r') as f:
-        if file.endswith('.pdf'):
-            lines = [l for l in read_pdf_in_chunks(file, tkns_per_chunk*8)]
-            if len(lines) == 0:
-                print(f"Warning: {file} did not have any extractable text.")
-        elif file.endswith('.csv'):
+        if file.endswith('.csv'):
             lines = [l for l in read_in_rows_csv(f, tkns_per_chunk*8)]
         else:
             lines = [l for l in read_in_chunks(f, tkns_per_chunk*4)]

diff --git a/requirements.txt b/requirements.txt
@@ -6,7 +6,6 @@ geopy
 numpy
 openai
 pybars3
-pymupdf
 python-dotenv
 pytz
 rich