commenting out bandit

nux-ai · Mar 19, 2024 · 1799442 · 1799442
1 parent b761ca7
commit 1799442
Show file tree

Hide file tree

Showing 7 changed files with 182 additions and 1,080 deletions.
diff --git a/src/api/utilities/code.py b/src/api/utilities/code.py
@@ -13,25 +13,25 @@ def check_code_security(code, max_code_length=3000, language="python"):
                 status_code=400,
                 detail=f"Code length exceeds {max_code_length} characters",
             )
-        extensions = {"python": ".py"}
-        with tempfile.NamedTemporaryFile(suffix=".py") as temp:
-            temp.write(code.encode())
-            temp.flush()
-            result = subprocess.run(
-                ["bandit", temp.name], capture_output=True, text=True
-            )
+        # extensions = {"python": ".py"}
+        # with tempfile.NamedTemporaryFile(suffix=".py") as temp:
+        #     temp.write(code.encode())
+        #     temp.flush()
+        #     result = subprocess.run(
+        #         ["bandit", temp.name], capture_output=True, text=True
+        #     )
 
-        # Check if bandit found any issues
-        if "No issues identified." in result.stdout:
-            return True
-        else:
-            # Parse the issues into a JSON
-            # Skip the first two lines, which are not issues
-            issues = result.stdout.split("\n")[2:]
-            issues_json = [
-                {"issue": issue} for issue in issues if issue
-            ]  # Ignore empty lines
-            return json.dumps(issues_json[1]["issue"])
+        # # Check if bandit found any issues
+        # if "No issues identified." in result.stdout:
+        #     return True
+        # else:
+        #     # Parse the issues into a JSON
+        #     # Skip the first two lines, which are not issues
+        #     issues = result.stdout.split("\n")[2:]
+        #     issues_json = [
+        #         {"issue": issue} for issue in issues if issue
+        #     ]  # Ignore empty lines
+        #     return json.dumps(issues_json[1]["issue"])
 
     @staticmethod
     def check_for_function(code):

diff --git a/src/inference/embed/modalities/audio.py b/src/inference/embed/modalities/audio.py
@@ -1,39 +1,39 @@
-from transformers import Wav2Vec2Processor, Wav2Vec2Model
-import torch
-import librosa
-import numpy as np
-import io
+# from transformers import Wav2Vec2Processor, Wav2Vec2Model
+# import torch
+# import librosa
+# import numpy as np
+# import io
 
-class AudioEmbeddingService:
-    def __init__(self, model):
-        self.processor = Wav2Vec2Processor.from_pretrained(model)
-        self.model = Wav2Vec2Model.from_pretrained(model)
+# class AudioEmbeddingService:
+#     def __init__(self, model):
+#         self.processor = Wav2Vec2Processor.from_pretrained(model)
+#         self.model = Wav2Vec2Model.from_pretrained(model)
 
-    def encode(self, file_stream):
-        # Load the audio file
-        audio_input, sr = librosa.load(io.BytesIO(file_stream), sr=16000)
+#     def encode(self, file_stream):
+#         # Load the audio file
+#         audio_input, sr = librosa.load(io.BytesIO(file_stream), sr=16000)
 
-        # Process audio
-        inputs = self.processor(audio_input, return_tensors="pt", sampling_rate=sr)
+#         # Process audio
+#         inputs = self.processor(audio_input, return_tensors="pt", sampling_rate=sr)
 
-        # Move to CPU
-        inputs = {k: v.to("cpu") for k, v in inputs.items()}
+#         # Move to CPU
+#         inputs = {k: v.to("cpu") for k, v in inputs.items()}
 
-        # Get the audio embedding
-        with torch.no_grad():
-            audio_features = self.model(**inputs).last_hidden_state
-
-        # Mean pooling the embeddings across the time dimension
-        embeddings = audio_features.mean(dim=1)
+#         # Get the audio embedding
+#         with torch.no_grad():
+#             audio_features = self.model(**inputs).last_hidden_state
 
-        # Normalize the embeddings
-        normalized_embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+#         # Mean pooling the embeddings across the time dimension
+#         embeddings = audio_features.mean(dim=1)
 
-        return normalized_embeddings
+#         # Normalize the embeddings
+#         normalized_embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
 
-    def get_dimensions(self):
-        return self.model.config.hidden_size
+#         return normalized_embeddings
 
-    def get_token_size(self):
-        # Similar to images, token size isn't directly applicable to audio
-        return None
+#     def get_dimensions(self):
+#         return self.model.config.hidden_size
+
+#     def get_token_size(self):
+#         # Similar to images, token size isn't directly applicable to audio
+#         return None
diff --git a/src/inference/embed/modalities/image.py b/src/inference/embed/modalities/image.py
@@ -1,38 +1,38 @@
-from transformers import CLIPProcessor, CLIPModel
-import torch
-import io
-from PIL import Image
+# from transformers import CLIPProcessor, CLIPModel
+# import torch
+# import io
+# from PIL import Image
 
 
-class ImageEmbeddingService:
-    def __init__(self, model):
-        self.processor = CLIPProcessor.from_pretrained(model)
-        self.model = CLIPModel.from_pretrained(model)
+# class ImageEmbeddingService:
+#     def __init__(self, model):
+#         self.processor = CLIPProcessor.from_pretrained(model)
+#         self.model = CLIPModel.from_pretrained(model)
 
-    def encode(self, file_stream):
-        # Load the image
-        image = Image.open(io.BytesIO(file_stream))
+#     def encode(self, file_stream):
+#         # Load the image
+#         image = Image.open(io.BytesIO(file_stream))
 
-        # Process image
-        inputs = self.processor(images=image, return_tensors="pt")
+#         # Process image
+#         inputs = self.processor(images=image, return_tensors="pt")
 
-        # Move to CPU
-        inputs = {k: v.to("cpu") for k, v in inputs.items()}
+#         # Move to CPU
+#         inputs = {k: v.to("cpu") for k, v in inputs.items()}
 
-        # Get the image embedding
-        with torch.no_grad():
-            image_features = self.model.get_image_features(**inputs)
+#         # Get the image embedding
+#         with torch.no_grad():
+#             image_features = self.model.get_image_features(**inputs)
 
-        # Normalize the embeddings
-        image_embeddings = torch.nn.functional.normalize(image_features, p=2, dim=1)
+#         # Normalize the embeddings
+#         image_embeddings = torch.nn.functional.normalize(image_features, p=2, dim=1)
 
-        return image_embeddings
+#         return image_embeddings
 
-    def get_dimensions(self):
-        # CLIP's image and text embeddings are of the same size
-        return self.model.config.text_config.hidden_size
+#     def get_dimensions(self):
+#         # CLIP's image and text embeddings are of the same size
+#         return self.model.config.text_config.hidden_size
 
-    def get_token_size(self):
-        # This method isn't directly applicable to images as it is to text
-        # Returning None or a default value could be more appropriate
-        return None
+#     def get_token_size(self):
+#         # This method isn't directly applicable to images as it is to text
+#         # Returning None or a default value could be more appropriate
+#         return None
diff --git a/src/inference/embed/modalities/video.py b/src/inference/embed/modalities/video.py
@@ -1,57 +1,57 @@
-import torch
-from PIL import Image
-from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
-from transformers import CLIPProcessor, CLIPModel
-import cv2
-import numpy as np
-
-
-class VideoEmbeddingService:
-    def __init__(self, model):
-        self.processor = CLIPProcessor.from_pretrained(model)
-        self.model = CLIPModel.from_pretrained(model)
-
-    def frame_embeddings(self, video_path):
-        # Initialize a video capture object
-        cap = cv2.VideoCapture(video_path)
-        frame_embeddings = []
-
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-
-            # Convert the color space from BGR to RGB, then convert to PIL Image
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            pil_image = Image.fromarray(frame)
-
-            # Preprocess the image
-            inputs = self.processor(images=pil_image, return_tensors="pt", padding=True)
-
-            # Move to CPU and get the image embedding
-            inputs = {k: v.to("cpu") for k, v in inputs.items()}
-            with torch.no_grad():
-                frame_features = self.model.get_image_features(**inputs)
-
-            frame_embeddings.append(frame_features)
-
-        cap.release()
-        return torch.stack(frame_embeddings)
-
-    def encode(self, file_stream):
-        # Assume file_stream is a path for simplicity; adapt as necessary for actual streams
-        embeddings = self.frame_embeddings(file_stream)
-        # Aggregate embeddings, e.g., by averaging
-        video_embedding = embeddings.mean(dim=0)
-        # Normalize the embeddings
-        normalized_embedding = torch.nn.functional.normalize(
-            video_embedding, p=2, dim=1
-        )
-        return normalized_embedding
-
-    def get_dimensions(self):
-        return self.model.config.visual_projection.out_features
-
-    def get_token_size(self):
-        # Not applicable for videos, similar to audio and images
-        return None
+# import torch
+# from PIL import Image
+# from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
+# from transformers import CLIPProcessor, CLIPModel
+# import cv2
+# import numpy as np
+
+
+# class VideoEmbeddingService:
+#     def __init__(self, model):
+#         self.processor = CLIPProcessor.from_pretrained(model)
+#         self.model = CLIPModel.from_pretrained(model)
+
+#     def frame_embeddings(self, video_path):
+#         # Initialize a video capture object
+#         cap = cv2.VideoCapture(video_path)
+#         frame_embeddings = []
+
+#         while True:
+#             ret, frame = cap.read()
+#             if not ret:
+#                 break
+
+#             # Convert the color space from BGR to RGB, then convert to PIL Image
+#             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+#             pil_image = Image.fromarray(frame)
+
+#             # Preprocess the image
+#             inputs = self.processor(images=pil_image, return_tensors="pt", padding=True)
+
+#             # Move to CPU and get the image embedding
+#             inputs = {k: v.to("cpu") for k, v in inputs.items()}
+#             with torch.no_grad():
+#                 frame_features = self.model.get_image_features(**inputs)
+
+#             frame_embeddings.append(frame_features)
+
+#         cap.release()
+#         return torch.stack(frame_embeddings)
+
+#     def encode(self, file_stream):
+#         # Assume file_stream is a path for simplicity; adapt as necessary for actual streams
+#         embeddings = self.frame_embeddings(file_stream)
+#         # Aggregate embeddings, e.g., by averaging
+#         video_embedding = embeddings.mean(dim=0)
+#         # Normalize the embeddings
+#         normalized_embedding = torch.nn.functional.normalize(
+#             video_embedding, p=2, dim=1
+#         )
+#         return normalized_embedding
+
+#     def get_dimensions(self):
+#         return self.model.config.visual_projection.out_features
+
+#     def get_token_size(self):
+#         # Not applicable for videos, similar to audio and images
+#         return None
diff --git a/src/inference/embed/service.py b/src/inference/embed/service.py
@@ -5,26 +5,27 @@
 
 from _utils import create_success_response
 
-from .modalities.image import ImageEmbeddingService
 from .modalities.text import TextEmbeddingService
-from .modalities.audio import AudioEmbeddingService
-from .modalities.video import VideoEmbeddingService
+
+# from .modalities.image import ImageEmbeddingService
+# from .modalities.audio import AudioEmbeddingService
+# from .modalities.video import VideoEmbeddingService
 
 
 class EmbeddingHandler:
     def __init__(self, modality, model):
         if modality == "text":
             # sentence-transformers/all-MiniLM-L6-v2
             self.service = TextEmbeddingService(model)
-        elif modality == "image":
-            # openai/clip-vit-base-patch32
-            self.service = ImageEmbeddingService(model)
-        elif modality == "audio":
-            # facebook/wav2vec2-base-960h
-            self.service = AudioEmbeddingService(model)
-        elif modality == "video":
-            # openai/clip-vit-base-patch32
-            self.service = VideoEmbeddingService(model)
+        # elif modality == "image":
+        #     # openai/clip-vit-base-patch32
+        #     self.service = ImageEmbeddingService(model)
+        # elif modality == "audio":
+        #     # facebook/wav2vec2-base-960h
+        #     self.service = AudioEmbeddingService(model)
+        # elif modality == "video":
+        #     # openai/clip-vit-base-patch32
+        #     self.service = VideoEmbeddingService(model)
         else:
             raise ValueError(f"Unknown modality: {modality}")