Skip to content

Commit

Permalink
Merge pull request #27 from AditiR-42/ms5
Browse files Browse the repository at this point in the history
Fix Test Coverage
  • Loading branch information
AditiR-42 authored Dec 12, 2024
2 parents 81cbca5 + ae8f58d commit 46e5139
Show file tree
Hide file tree
Showing 6 changed files with 384 additions and 28 deletions.
43 changes: 41 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
python-multipart
pytest==8.3.3
pytest-cov==6.0.0
PyMuPDF==1.21.1
pandas==1.5.3
torch==2.2.2
google-cloud-aiplatform>=1.27.0
numpy<2.0
Expand All @@ -10,3 +9,43 @@ transformers==4.34.0
gcsfs==2023.10.0
httpx==0.24.0
reportlab==4.0.4

# Essential Libraries
fastapi==0.78.0
uvicorn==0.13.4
pandas==2.2.3
tensorflow==2.16.1
nltk
fuzzywuzzy

# Google Cloud SDK and related dependencies
google-api-core==2.23.0
google-auth==2.36.0
# google-cloud-aiplatform[all]==1.71.1
google-cloud-bigquery==3.27.0
google-cloud-core==2.4.1
google-cloud-resource-manager==1.13.1
google-cloud-storage==2.18.2
google-crc32c==1.6.0
google-resumable-media==2.7.2
googleapis-common-protos==1.66.0
grpc-google-iam-v1==0.13.1
grpcio==1.68.0
grpcio-status
proto-plus==1.25.0
protobuf==4.23.4

# HTTP and Networking
requests==2.32.3
urllib3==1.26.18
certifi==2024.8.30
idna==3.10

# PyMuPDF for PDF processing
PyMuPDF==1.24.13

# Miscellaneous utilities
shapely==2.0.6

# Optional Libraries for Development and Testing
nose==1.3.7
47 changes: 26 additions & 21 deletions src/api_service/api/routers/recommend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from fastapi import FastAPI, HTTPException, APIRouter, Depends
from fastapi import HTTPException
from pydantic import BaseModel
from google.cloud import storage, aiplatform
from vertexai.generative_models import GenerativeModel
Expand All @@ -11,17 +10,24 @@
import nltk
import os
import traceback
nltk.download('wordnet')

# Dynamically set GOOGLE_APPLICATION_CREDENTIALS to the secrets folder
# current_dir = os.path.dirname(os.path.abspath(__file__))
# secrets_path = os.path.abspath(os.path.join(current_dir, "secrets/model-containerization.json"))
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = secrets_path
nltk.download("wordnet")

# Initialize FastAPI and Router
router = APIRouter()

# function to load dataset
# Initialize dataset as a global placeholder
df = None
formatted_data = None

# Initialize Vertex AI
aiplatform.init(project="ac215-privasee", location="us-central1")

# Initialize the model
model = GenerativeModel("gemini-1.5-flash-002")


# Function to load dataset
def load_dataset():
try:
client = storage.Client()
Expand All @@ -35,23 +41,25 @@ def load_dataset():
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to load dataset: {str(e)}")

#load the dataset as a universal variable
df = load_dataset()
formatted_data = df["formatted"].tolist()

# Initialize Vertex AI
aiplatform.init(project="ac215-privasee", location="us-central1")
# Function to initialize dataset lazily
def initialize_dataset():
global df, formatted_data
if df is None:
try:
df = load_dataset()
formatted_data = df["formatted"].tolist()
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error initializing dataset: {str(e)}")

#initialize the model
model = GenerativeModel("gemini-1.5-flash-002")

# Request Model
class QueryRequest(BaseModel):
query: str


# Helper Functions
def extract_service_and_requirements(query: str):
# Define the prompt to parse all fields from the query
prompt = (
f"Analyze this query: '{query}'. Extract values for the following fields if mentioned, otherwise return 'NA':\n"
f"- Service: Name of the service (e.g., Facebook, Instagram)\n"
Expand All @@ -68,15 +76,12 @@ def extract_service_and_requirements(query: str):
f"Respond in valid JSON format with the fields as keys. Use 'NA' for any field not mentioned in the query."
)

# Generate the response using the model
response = model.generate_content(prompt)

try:
# Clean and parse the response
clean_response = response.text.strip().strip("```").strip("json").strip()
parsed_data = json.loads(clean_response)

# Ensure all fields are returned, even if NA
expected_fields = [
"Service",
"privacy_rating",
Expand All @@ -90,9 +95,7 @@ def extract_service_and_requirements(query: str):
"Free",
"Contains Ads",
]
result = {field: parsed_data.get(field, "NA") for field in expected_fields}

return result
return {field: parsed_data.get(field, "NA") for field in expected_fields}

except json.JSONDecodeError as e:
raise ValueError(f"Error decoding JSON response: {e}")
Expand All @@ -107,6 +110,8 @@ def filter_dataframe(criteria):
Returns:
pd.DataFrame: A filtered DataFrame based on the criteria.
"""
initialize_dataset() # Ensure the dataset is loaded
filtered_df = df.copy()
# Define a mapping for privacy ratings
privacy_rating_order = {
"A": 4,
Expand Down
4 changes: 2 additions & 2 deletions src/api_service/api/routers/summarize.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
from fastapi import APIRouter, UploadFile, HTTPException, Form, File
import logging
from api.utils.process_pdf import process_pdf_privacy_issues
from api.utils.privacy_grader import PrivacyGrader
from api_service.api.utils.process_pdf import process_pdf_privacy_issues
from api_service.api.utils.privacy_grader import PrivacyGrader
import traceback

# Initialize the FastAPI router
Expand Down
3 changes: 2 additions & 1 deletion src/api_service/api/service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from fastapi import FastAPI
from starlette.middleware.cors import CORSMiddleware
from api.routers import summarize, recommend
from api_service.api.routers import summarize, recommend

# Dynamically set GOOGLE_APPLICATION_CREDENTIALS to the secrets folder
current_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -41,3 +41,4 @@ async def get_api_status():
app.include_router(summarize.router)



1 change: 1 addition & 0 deletions src/models/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ google-cloud-aiplatform = "*"
pymupdf = "*"
pdfplumber = "*"
fitz = "*"
fastapi = "*"

[dev-packages]
pytest = "*"
Expand Down
Loading

0 comments on commit 46e5139

Please sign in to comment.