Skip to content

Commit

Permalink
add logging and dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
florian committed Jun 15, 2024
1 parent 3ec5cc2 commit 7e6f683
Show file tree
Hide file tree
Showing 22 changed files with 357 additions and 143 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,5 +166,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

/data
.env
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# syntax=docker/dockerfile:1

FROM python:3.9-slim-buster
FROM python:3.10-slim-buster

ENV POETRY_VERSION=1.4 \
ENV POETRY_VERSION=1.6 \
POETRY_VIRTUALENVS_CREATE=false

# Install poetry
Expand All @@ -18,4 +18,6 @@ RUN poetry install --no-interaction --no-ansi --no-root --no-dev
# Copy Python code to the Docker image
COPY pypi_llm /code/pypi_llm/

ENV PYTHONPATH=/code

CMD [ "python", "pypi_llm/foo.py"]
55 changes: 10 additions & 45 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,55 +1,20 @@
# pypi-llm

[![Release](https://img.shields.io/github/v/release/fpgmaas/pypi-llm)](https://img.shields.io/github/v/release/fpgmaas/pypi-llm)
[![Build status](https://img.shields.io/github/actions/workflow/status/fpgmaas/pypi-llm/main.yml?branch=main)](https://github.com/fpgmaas/pypi-llm/actions/workflows/main.yml?query=branch%3Amain)
[![codecov](https://codecov.io/gh/fpgmaas/pypi-llm/branch/main/graph/badge.svg)](https://codecov.io/gh/fpgmaas/pypi-llm)
[![Commit activity](https://img.shields.io/github/commit-activity/m/fpgmaas/pypi-llm)](https://img.shields.io/github/commit-activity/m/fpgmaas/pypi-llm)
[![License](https://img.shields.io/github/license/fpgmaas/pypi-llm)](https://img.shields.io/github/license/fpgmaas/pypi-llm)
https://drive.google.com/file/d/1huR7-VD3AieBRCcQyRX9MWbPLMb_czjq/view?usp=sharing

This is a template repository for Python projects that use Poetry for their dependency management.
# setup

- **Github repository**: <https://github.com/fpgmaas/pypi-llm/>
- **Documentation** <https://fpgmaas.github.io/pypi-llm/>

## Getting started with your project

First, create a repository on GitHub with the same name as this project, and then run the following commands:

```bash
git init -b main
git add .
git commit -m "init commit"
git remote add origin git@github.com:fpgmaas/pypi-llm.git
git push -u origin main
```

Finally, install the environment and the pre-commit hooks with

```bash
make install
docker build -t pypi-llm .
```

You are now ready to start development on your project!
The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release.

To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi).
For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github).
To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/).

## Releasing a new version

- Create an API Token on [Pypi](https://pypi.org/).
- Add the API Token to your projects secrets with the name `PYPI_TOKEN` by visiting [this page](https://github.com/fpgmaas/pypi-llm/settings/secrets/actions/new).
- Create a [new release](https://github.com/fpgmaas/pypi-llm/releases/new) on Github.
- Create a new tag in the form `*.*.*`.

For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/cicd/#how-to-trigger-a-release).

---

Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).

---
```
docker run --rm \
--env-file .env \
-v $(pwd)/data:/code/data \
pypi-llm \
python /code/pypi_llm/scripts/1_download_dataset.py
```

## total

Expand Down
4 changes: 4 additions & 0 deletions data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore
30 changes: 30 additions & 0 deletions frontend/app/components/InfoBox.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import React from "react";

interface InfoBoxProps {
infoBoxVisible: boolean;
}

const InfoBox: React.FC<InfoBoxProps> = ({ infoBoxVisible }) => {
if (!infoBoxVisible) return null;

return (
<div className="w-3/5 bg-white p-6 rounded-lg shadow-lg mt-4">
<h2 className="text-2xl font-bold mb-2">How does this work?</h2>
<p className="text-gray-700">
This application allows you to search for Python packages on PyPi using
natural language. An example query would be "a package that creates
plots and beautiful visualizations".
</p>
<br />
<p className="text-gray-700">
Once you click search, your query will be matched against the summary
and the first part of the description of all PyPi packages with more
than 50 weekly downloads. The results are then scored based on their
similarity and their number of weekly downloads, and the thirty best
results are displayed in the table below.
</p>
</div>
);
};

export default InfoBox;
File renamed without changes.
87 changes: 29 additions & 58 deletions frontend/app/page.tsx
Original file line number Diff line number Diff line change
@@ -1,53 +1,28 @@
"use client";

import { useState } from "react";
import axios from "axios";
import SearchResultsTable from "../components/SearchResultsTable";
import { handleSearch, sortResults } from "./utils/search";
import SearchResultsTable from "./components/SearchResultsTable";
import InfoBox from "./components/InfoBox";
import { ClipLoader } from "react-spinners";

export default function Home() {
const [text, setText] = useState("");
const [results, setResults] = useState([]);
const [sortField, setSortField] = useState("weekly_downloads");
const [sortDirection, setSortDirection] = useState("desc");
const [loading, setLoading] = useState(false);
const [error, setError] = useState("");
const [infoBoxVisible, setInfoBoxVisible] = useState(false);

const handleSearch = async () => {
setLoading(true);
setError("");
try {
const response = await axios.post(
"http://localhost:8000/search",
{
query: text,
},
{
headers: {
"Content-Type": "application/json",
},
},
);
const fetchedResults = response.data.matches;
setResults(sortResults(fetchedResults, sortField, sortDirection));
} catch (error) {
setError("Error fetching search results.");
console.error("Error fetching search results:", error);
} finally {
setLoading(false);
}
};
interface Match {
name: string;
similarity: number;
weekly_downloads: number;
summary: string;
}

const sortResults = (data, field, direction) => {
return [...data].sort((a, b) => {
if (a[field] < b[field]) return direction === "asc" ? -1 : 1;
if (a[field] > b[field]) return direction === "asc" ? 1 : -1;
return 0;
});
};
export default function Home() {
const [text, setText] = useState<string>("");
const [results, setResults] = useState<Match[]>([]);
const [sortField, setSortField] = useState<string>("similarity");
const [sortDirection, setSortDirection] = useState<string>("desc");
const [loading, setLoading] = useState<boolean>(false);
const [error, setError] = useState<string>("");
const [infoBoxVisible, setInfoBoxVisible] = useState<boolean>(false);

const handleSort = (field) => {
const handleSort = (field: string) => {
const direction =
sortField === field && sortDirection === "asc" ? "desc" : "asc";
setSortField(field);
Expand All @@ -72,7 +47,16 @@ export default function Home() {
></textarea>
<button
className="w-[250px] p-2 border rounded bg-blue-500 text-white hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500"
onClick={handleSearch}
onClick={() =>
handleSearch(
text,
sortField,
sortDirection,
setResults,
setLoading,
setError,
)
}
>
Search
</button>
Expand All @@ -91,20 +75,7 @@ export default function Home() {
</button>
</div>

{infoBoxVisible && (
<div className="w-3/5 bg-white p-6 rounded-lg shadow-lg mt-4">
<h2 className="text-2xl font-bold mb-2">How does this work?</h2>
<p className="text-gray-700">
This application allows you to search for Python packages on PyPi
using natural language. So an example query would be "a package that
creates plots and beautiful visualizations". Once you click search,
your query will be matched against the summary and the first part of
the description of all PyPi packages with more than 50 weekly
downloads, and the 50 most similar results will be displayed in a
table below.
</p>
</div>
)}
<InfoBox infoBoxVisible={infoBoxVisible} />

{results.length > 0 && (
<div className="w-full flex justify-center mt-6">
Expand Down
52 changes: 52 additions & 0 deletions frontend/app/utils/search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import axios from "axios";

interface Match {
name: string;
similarity: number;
weekly_downloads: number;
summary: string;
}

export const handleSearch = async (
query: string,
sortField: string,
sortDirection: string,
setResults: React.Dispatch<React.SetStateAction<Match[]>>,
setLoading: React.Dispatch<React.SetStateAction<boolean>>,
setError: React.Dispatch<React.SetStateAction<string>>,
) => {
setLoading(true);
setError("");
try {
const response = await axios.post(
"http://localhost:8000/search",
{
query: query,
},
{
headers: {
"Content-Type": "application/json",
},
},
);
const fetchedResults: Match[] = response.data.matches;
setResults(sortResults(fetchedResults, sortField, sortDirection));
} catch (error) {
setError("Error fetching search results.");
console.error("Error fetching search results:", error);
} finally {
setLoading(false);
}
};

export const sortResults = (
data: Match[],
field: string,
direction: string,
): Match[] => {
return [...data].sort((a, b) => {
if (a[field] < b[field]) return direction === "asc" ? -1 : 1;
if (a[field] > b[field]) return direction === "asc" ? 1 : -1;
return 0;
});
};
35 changes: 34 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 14 additions & 1 deletion pypi_llm/api/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import polars as pl
from dotenv import load_dotenv
from fastapi import FastAPI
Expand All @@ -6,9 +8,12 @@
from sentence_transformers import SentenceTransformer

from pypi_llm.config import Config
from pypi_llm.utils.logging import setup_logging
from pypi_llm.utils.score_calculator import calculate_score
from pypi_llm.vector_database import VectorDatabaseInterface

setup_logging()

app = FastAPI()

load_dotenv()
Expand Down Expand Up @@ -55,12 +60,20 @@ class SearchResponse(BaseModel):

@app.post("/search/", response_model=SearchResponse)
async def search(query: QueryModel):
"""
Search for the packages whose summary and description have the highest similarity to the query.
We take the top_k * 2 most similar packages, and then calculate weighted score based on the similarity and weekly downloads.
The top_k packages with the highest score are returned.
"""

logging.info(f"Searching for similar projects. Query: '{query.query}'")
df_matches = vector_database_interface.find_similar(query.query, top_k=query.top_k * 2)
df_matches = df_matches.join(df, how="left", on="name")

logging.info("Found similar projects. Calculating the weighted scores and filtering...")
df_matches = calculate_score(df_matches)
df_matches = df_matches.sort("score", descending=True)
df_matches = df_matches.head(query.top_k)

print("sending")
logging.info("Returning the results...")
return SearchResponse(matches=df_matches.to_dicts())
Loading

0 comments on commit 7e6f683

Please sign in to comment.