Skip to content

Commit

Permalink
new features
Browse files Browse the repository at this point in the history
  • Loading branch information
AstraBert committed May 9, 2024
1 parent 5b69aa5 commit ea01ef0
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
flagged/
scripts/__pycache__
docker/__pycache__
docker/flagged
docker/flagged
qdrant_storage/
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ Choose the task among:
- *image-generation-pollinations*: stable diffusion, use Pollinations AI API; if you choose 'image-generation-pollinations', you do not need to specify anything else apart from the task - **MULTILINGUAL**
- *image-classification*: classify an image, supports every image-classification model on HF Hub - **ENGLISH ONLY**
- *image-to-text*: describe an image, supports every image-to-text model on HF Hub - **ENGLISH ONLY**
- *image-retrieval-search*: search an image database uploading a folder as database input. The folder should have the following structure:

```
./
├── test/
| ├── label1/
| └── label2/
└── train/
├── label1/
└── label2/
```
You can query the database starting from your own pictures.

### 6. Go to `localhost:7860` and start using your assistant

Expand Down
5 changes: 4 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ WORKDIR /app
# Add the current directory contents into the container at /app
ADD . /app

# Add new package
RUN python3 -m pip install datasets==2.15.0

# Expose the port that the application will run on
EXPOSE 7860
EXPOSE 8760

ENTRYPOINT [ "python3", "select_and_run.py" ]
5 changes: 4 additions & 1 deletion docker/image_classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from transformers import AutoModelForImageClassification, AutoImageProcessor, pipeline
from PIL import Image
from argparse import ArgumentParser
import torch

argparse = ArgumentParser()
argparse.add_argument(
Expand All @@ -18,9 +19,11 @@

model_checkpoint = mod

model = AutoModelForImageClassification.from_pretrained(model_checkpoint)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForImageClassification.from_pretrained(model_checkpoint).to(device)
processor = AutoImageProcessor.from_pretrained(model_checkpoint)


pipe = pipeline("image-classification", model=model, image_processor=processor)

def get_results(image, ppln=pipe):
Expand Down
3 changes: 2 additions & 1 deletion docker/image_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

model_checkpoint = mod

pipe = pipeline("image-to-text", model=model_checkpoint)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipe = pipeline("image-to-text", model=model_checkpoint, device=device)

def get_results(image, ppln=pipe):
img = Image.fromarray(image)
Expand Down
3 changes: 2 additions & 1 deletion docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ diffusers==0.27.2
pydantic==2.6.4
qdrant_client==1.9.0
pillow==10.2.0
accelerate
datasets==2.15.0
accelerate
74 changes: 74 additions & 0 deletions docker/retrieval_image_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from transformers import AutoImageProcessor, AutoModel
from utils import ImageDB
from PIL import Image
from qdrant_client import QdrantClient
import gradio as gr
from argparse import ArgumentParser
import torch

argparse = ArgumentParser()
argparse.add_argument(
"-m",
"--model",
help="HuggingFace Model identifier, such as 'google/flan-t5-base'",
required=True,
)

argparse.add_argument(
"-id",
"--image_dimension",
help="Dimension of the image (e.g. 512, 758, 384...)",
required=False,
default=512,
type=int
)

argparse.add_argument(
"-d",
"--directory",
help="Directory where all your pdfs of interest are stored",
required=False,
default="No directory"
)


args = argparse.parse_args()


mod = args.model
dirs = args.directory
imd = args.image_dimension

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = AutoImageProcessor.from_pretrained(mod)
model = AutoModel.from_pretrained(mod).to(device)

client = QdrantClient(host="host.docker.internal", port=6333)
imdb = ImageDB(dirs, processor, model, client, imd)
print(imdb.collection_name)
imdb.create_dataset()
imdb.to_collection()


def see_images(dataset, results):
images = []
for i in range(len(results)):
img = dataset[results[0].id]['image']
images.append(img)
return images

def process_img(image):
global imdb
results = imdb.searchDB(Image.fromarray(image))
images = see_images(imdb.dataset, results)
return images


iface = gr.Interface(
title="everything-ai-retrievalimg",
fn=process_img,
inputs=gr.Image(label="Input Image"),
outputs=gr.Gallery(label="Matching Images"),
)

iface.launch(server_name="0.0.0.0", share=False)
5 changes: 3 additions & 2 deletions docker/retrieval_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sentence_transformers import SentenceTransformer
from argparse import ArgumentParser
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import sys
import torch
import os

argparse = ArgumentParser()
Expand Down Expand Up @@ -61,7 +61,8 @@
pdfdb.collect_data()
pdfdb.qdrant_collection_and_upload()

model = AutoModelForCausalLM.from_pretrained(mod)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(mod).to(device)
tokenizer = AutoTokenizer.from_pretrained(mod)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=2048, repetition_penalty=1.2, temperature=0.4)
Expand Down
19 changes: 14 additions & 5 deletions docker/select_and_run.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import subprocess as sp
import gradio as gr

TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py"}
TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py"}


def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None"):
if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations":
def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim="512"):
if tsk != "retrieval-text-generation" and tsk != "image-generation-pollinations" and tsk != "retrieval-image-search":
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}", shell=True)
return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod}"
elif tsk == "retrieval-text-generation":
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True)
return f"python3 {TASK_TO_SCRIPT[tsk]} -m {mod} -pf '{pdff}' -d '{dirs}' -l '{lan}'"
else:
elif tsk == "image-generation-pollinations":
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]}", shell=True)
return f"python3 {TASK_TO_SCRIPT[tsk]}"
else:
sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -d {dirs} -id {imdim} -m {mod}", shell=True)
return f"python3 {TASK_TO_SCRIPT[tsk]} -d {dirs} -id {imdim} -m {mod}"

demo = gr.Interface(
build_command,
Expand All @@ -38,7 +41,7 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None"):
),
gr.Textbox(
label="Directory",
info="Directory where all your pdfs of interest are stored (only available with 'retrieval-text-generation')",
info="Directory where all your pdfs or images (.jpg, .jpeg, .png) of interest are stored (only available with 'retrieval-text-generation' for pdfs and 'retrieval-image-search' for images)",
lines=3,
value="None",
),
Expand All @@ -48,6 +51,12 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None"):
lines=3,
value="None",
),
gr.Textbox(
label="Image dimension",
info="Dimension of the image (this is generally model and/or task-dependent!)",
lines=3,
value=f"e.g.: 512, 384, 758...",
),
],
outputs="textbox",
theme=gr.themes.Base()
Expand Down
4 changes: 3 additions & 1 deletion docker/text_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from utils import merge_pdfs
import gradio as gr
import time
import torch

histr = [[None, "Hi, I'm **everything-ai-summarization**🤖.\nI'm here to assist you and let you summarize _your_ texts and _your_ pdfs!\nCheck [my website](https://astrabert.github.io/everything-ai/) for troubleshooting and documentation reference\nHave fun!😊"]]

Expand All @@ -24,7 +25,8 @@

model_checkpoint = mod

summarizer = pipeline("summarization", model=model_checkpoint)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summarizer = pipeline("summarization", model=model_checkpoint, device=device)

def convert_none_to_str(l: list):
newlist = []
Expand Down
72 changes: 72 additions & 0 deletions docker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from datasets import load_dataset, Dataset
import torch
import numpy as np


def remove_items(test_list, item):
res = [i for i in test_list if i != item]
Expand Down Expand Up @@ -92,3 +96,71 @@ def translatef(self):
translation = translator.translate(self.text)
return translation

class ImageDB:
def __init__(self, imagesdir, processor, model, client, dimension):
self.imagesdir = imagesdir
self.processor = processor
self.model = model
self.client = client
self.dimension = dimension
if os.path.basename(self.imagesdir) != "":
self.collection_name = os.path.basename(self.imagesdir)+"_ImagesCollection"
else:
if "\\" in self.imagesdir:
self.collection_name = self.imagesdir.split("\\")[-2]+"_ImagesCollection"
else:
self.collection_name = self.imagesdir.split("/")[-2]+"_ImagesCollection"
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.client.recreate_collection(
collection_name=self.collection_name,
vectors_config=models.VectorParams(size=self.dimension, distance=models.Distance.COSINE)
)
def get_embeddings(self, batch):
inputs = self.processor(images=batch['image'], return_tensors="pt").to(self.device)
with torch.no_grad():
outputs = self.model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
batch['embeddings'] = outputs
return batch
def create_dataset(self):
self.dataset = load_dataset("imagefolder", data_dir=self.imagesdir, split="train")
self.dataset = self.dataset.map(self.get_embeddings, batched=True, batch_size=16)
def to_collection(self):
np.save(os.path.join(self.imagesdir, "vectors"), np.array(self.dataset['embeddings']), allow_pickle=False)

payload = self.dataset.select_columns([
"label"
]).to_pandas().fillna(0).to_dict(orient="records")

ids = list(range(self.dataset.num_rows))
embeddings = np.load(os.path.join(self.imagesdir, "vectors.npy")).tolist()

batch_size = 1000

for i in range(0, self.dataset.num_rows, batch_size):

low_idx = min(i+batch_size, self.dataset.num_rows)

batch_of_ids = ids[i: low_idx]
batch_of_embs = embeddings[i: low_idx]
batch_of_payloads = payload[i: low_idx]

self.client.upsert(
collection_name = self.collection_name,
points=models.Batch(
ids=batch_of_ids,
vectors=batch_of_embs,
payloads=batch_of_payloads
)
)
def searchDB(self, image):
dtst = {"image": [image], "label": ["None"]}
dtst = Dataset.from_dict(dtst)
dtst = dtst.map(self.get_embeddings, batched=True, batch_size=1)
img = dtst[0]
results = self.client.search(
collection_name=self.collection_name,
query_vector=img['embeddings'],
limit=4
)
return results

Binary file modified imgs/everything-ai.drawio.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit ea01ef0

Please sign in to comment.