Merge pull request #27 from AditiR-42/ms5

Fix Test Coverage
AditiR-42 · Dec 12, 2024 · 46e5139 · 46e5139
2 parents 81cbca5 + ae8f58d
commit 46e5139
Show file tree

Hide file tree

Showing 6 changed files with 384 additions and 28 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,6 @@
+python-multipart
 pytest==8.3.3
 pytest-cov==6.0.0
-PyMuPDF==1.21.1
-pandas==1.5.3
 torch==2.2.2
 google-cloud-aiplatform>=1.27.0
 numpy<2.0
@@ -10,3 +9,43 @@ transformers==4.34.0
 gcsfs==2023.10.0
 httpx==0.24.0
 reportlab==4.0.4
+
+# Essential Libraries
+fastapi==0.78.0 
+uvicorn==0.13.4
+pandas==2.2.3
+tensorflow==2.16.1
+nltk
+fuzzywuzzy
+
+# Google Cloud SDK and related dependencies
+google-api-core==2.23.0
+google-auth==2.36.0
+# google-cloud-aiplatform[all]==1.71.1
+google-cloud-bigquery==3.27.0
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.13.1
+google-cloud-storage==2.18.2
+google-crc32c==1.6.0
+google-resumable-media==2.7.2
+googleapis-common-protos==1.66.0
+grpc-google-iam-v1==0.13.1
+grpcio==1.68.0
+grpcio-status
+proto-plus==1.25.0
+protobuf==4.23.4
+
+# HTTP and Networking
+requests==2.32.3
+urllib3==1.26.18
+certifi==2024.8.30
+idna==3.10
+
+# PyMuPDF for PDF processing
+PyMuPDF==1.24.13
+
+# Miscellaneous utilities
+shapely==2.0.6
+
+# Optional Libraries for Development and Testing
+nose==1.3.7
diff --git a/src/api_service/api/routers/recommend.py b/src/api_service/api/routers/recommend.py
@@ -1,5 +1,4 @@
 from fastapi import FastAPI, HTTPException, APIRouter, Depends
-from fastapi import HTTPException
 from pydantic import BaseModel
 from google.cloud import storage, aiplatform
 from vertexai.generative_models import GenerativeModel
@@ -11,17 +10,24 @@
 import nltk
 import os
 import traceback
-nltk.download('wordnet')
 
-# Dynamically set GOOGLE_APPLICATION_CREDENTIALS to the secrets folder
-# current_dir = os.path.dirname(os.path.abspath(__file__))
-# secrets_path = os.path.abspath(os.path.join(current_dir, "secrets/model-containerization.json"))
-# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = secrets_path
+nltk.download("wordnet")
 
 # Initialize FastAPI and Router
 router = APIRouter()
 
-# function to load dataset
+# Initialize dataset as a global placeholder
+df = None
+formatted_data = None
+
+# Initialize Vertex AI
+aiplatform.init(project="ac215-privasee", location="us-central1")
+
+# Initialize the model
+model = GenerativeModel("gemini-1.5-flash-002")
+
+
+# Function to load dataset
 def load_dataset():
     try:
         client = storage.Client()
@@ -35,23 +41,25 @@ def load_dataset():
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Failed to load dataset: {str(e)}")
 
-#load the dataset as a universal variable
-df = load_dataset()
-formatted_data = df["formatted"].tolist()
 
-# Initialize Vertex AI
-aiplatform.init(project="ac215-privasee", location="us-central1")
+# Function to initialize dataset lazily
+def initialize_dataset():
+    global df, formatted_data
+    if df is None:
+        try:
+            df = load_dataset()
+            formatted_data = df["formatted"].tolist()
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Error initializing dataset: {str(e)}")
 
-#initialize the model
-model = GenerativeModel("gemini-1.5-flash-002")
 
 # Request Model
 class QueryRequest(BaseModel):
     query: str
 
+
 # Helper Functions
 def extract_service_and_requirements(query: str):
-    # Define the prompt to parse all fields from the query
     prompt = (
         f"Analyze this query: '{query}'. Extract values for the following fields if mentioned, otherwise return 'NA':\n"
         f"- Service: Name of the service (e.g., Facebook, Instagram)\n"
@@ -68,15 +76,12 @@ def extract_service_and_requirements(query: str):
         f"Respond in valid JSON format with the fields as keys. Use 'NA' for any field not mentioned in the query."
     )
 
-    # Generate the response using the model
     response = model.generate_content(prompt)
 
     try:
-        # Clean and parse the response
         clean_response = response.text.strip().strip("```").strip("json").strip()
         parsed_data = json.loads(clean_response)
 
-        # Ensure all fields are returned, even if NA
         expected_fields = [
             "Service",
             "privacy_rating",
@@ -90,9 +95,7 @@ def extract_service_and_requirements(query: str):
             "Free",
             "Contains Ads",
         ]
-        result = {field: parsed_data.get(field, "NA") for field in expected_fields}
-
-        return result
+        return {field: parsed_data.get(field, "NA") for field in expected_fields}
 
     except json.JSONDecodeError as e:
         raise ValueError(f"Error decoding JSON response: {e}")
@@ -107,6 +110,8 @@ def filter_dataframe(criteria):
     Returns:
         pd.DataFrame: A filtered DataFrame based on the criteria.
     """
+    initialize_dataset()  # Ensure the dataset is loaded
+    filtered_df = df.copy()
     # Define a mapping for privacy ratings
     privacy_rating_order = {
         "A": 4,

diff --git a/src/api_service/api/routers/summarize.py b/src/api_service/api/routers/summarize.py
@@ -1,8 +1,8 @@
 import os
 from fastapi import APIRouter, UploadFile, HTTPException, Form, File
 import logging
-from api.utils.process_pdf import process_pdf_privacy_issues
-from api.utils.privacy_grader import PrivacyGrader
+from api_service.api.utils.process_pdf import process_pdf_privacy_issues
+from api_service.api.utils.privacy_grader import PrivacyGrader
 import traceback
 
 # Initialize the FastAPI router

diff --git a/src/api_service/api/service.py b/src/api_service/api/service.py
@@ -1,7 +1,7 @@
 import os
 from fastapi import FastAPI
 from starlette.middleware.cors import CORSMiddleware
-from api.routers import summarize, recommend
+from api_service.api.routers import summarize, recommend
 
 # Dynamically set GOOGLE_APPLICATION_CREDENTIALS to the secrets folder
 current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -41,3 +41,4 @@ async def get_api_status():
 app.include_router(summarize.router)
 
 
+
diff --git a/src/models/Pipfile b/src/models/Pipfile
@@ -18,6 +18,7 @@ google-cloud-aiplatform = "*"
 pymupdf = "*"
 pdfplumber = "*"
 fitz = "*"
+fastapi = "*"
 
 [dev-packages]
 pytest = "*"