Skip to content

Commit

Permalink
Revert "Merge pull request #71 from cpacker/pdf-support"
Browse files Browse the repository at this point in the history
This reverts commit e3325a0, reversing
changes made to 3c4562e.
  • Loading branch information
cpacker committed Oct 21, 2023
1 parent 5b99f08 commit 82bb457
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 13 deletions.
13 changes: 1 addition & 12 deletions memgpt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import tiktoken
import glob
import sqlite3
import fitz
from tqdm import tqdm
from memgpt.openai_tools import async_get_embedding_with_backoff

Expand Down Expand Up @@ -99,12 +98,6 @@ def read_in_chunks(file_object, chunk_size):
break
yield data

def read_pdf_in_chunks(file, chunk_size):
doc = fitz.open(file)
for page in doc:
text = page.get_text()
yield text

def read_in_rows_csv(file_object, chunk_size):
csvreader = csv.reader(file_object)
header = next(csvreader)
Expand All @@ -130,11 +123,7 @@ def total_bytes(pattern):
def chunk_file(file, tkns_per_chunk=300, model='gpt-4'):
encoding = tiktoken.encoding_for_model(model)
with open(file, 'r') as f:
if file.endswith('.pdf'):
lines = [l for l in read_pdf_in_chunks(file, tkns_per_chunk*8)]
if len(lines) == 0:
print(f"Warning: {file} did not have any extractable text.")
elif file.endswith('.csv'):
if file.endswith('.csv'):
lines = [l for l in read_in_rows_csv(f, tkns_per_chunk*8)]
else:
lines = [l for l in read_in_chunks(f, tkns_per_chunk*4)]
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ geopy
numpy
openai
pybars3
pymupdf
python-dotenv
pytz
rich
Expand Down

0 comments on commit 82bb457

Please sign in to comment.