Skip to content

Commit

Permalink
Merge pull request #14 from jaypyles/13-create-a-statistics-page
Browse files Browse the repository at this point in the history
feat: add statistics page
  • Loading branch information
jaypyles authored Jul 21, 2024
2 parents 242056b + ccbced2 commit b4e059a
Show file tree
Hide file tree
Showing 12 changed files with 340 additions and 8 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ From the table, users can download an excel sheet of the job's results, along wi

![logs](https://github.com/jaypyles/www-scrape/blob/master/docs/log_page.png)

- View a small statistics view of jobs ran

![statistics](https://github.com/jaypyles/www-scrape/blob/master/docs/stats_page.png)

## Installation

1. Clone the repository:
Expand Down
28 changes: 25 additions & 3 deletions api/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import logging
from io import BytesIO
from openpyxl import Workbook
from typing import Any
from datetime import datetime
from bson import ObjectId

# PDM
from fastapi import BackgroundTasks, FastAPI, HTTPException
Expand All @@ -15,9 +18,16 @@
client = docker.from_env()

# LOCAL
from api.backend.job import query, insert, delete_jobs
from api.backend.job import (
average_elements_per_link,
get_jobs_per_day,
query,
insert,
delete_jobs,
)
from api.backend.models import (
DownloadJob,
GetStatistics,
SubmitScrapeJob,
DeleteScrapeJobs,
RetrieveScrapeJobs,
Expand Down Expand Up @@ -64,7 +74,8 @@ async def submit_scrape_job(job: SubmitScrapeJob, background_tasks: BackgroundTa
job.id = uuid.uuid4().hex

if job.user:
await insert(jsonable_encoder(job))
job_dict = job.model_dump()
await insert(job_dict)

return JSONResponse(content=f"Job queued for scraping: {job.id}")
except Exception as e:
Expand All @@ -76,7 +87,7 @@ async def retrieve_scrape_jobs(retrieve: RetrieveScrapeJobs):
LOG.info(f"Retrieving jobs for account: {retrieve.user}")
try:
results = await query({"user": retrieve.user})
return JSONResponse(content=results[::-1])
return JSONResponse(content=jsonable_encoder(results[::-1]))
except Exception as e:
LOG.error(f"Exception occurred: {e}")
return JSONResponse(content={"error": str(e)}, status_code=500)
Expand Down Expand Up @@ -184,3 +195,14 @@ def log_generator():
return StreamingResponse(log_generator(), media_type="text/event-stream")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/statistics/get-average-element-per-link")
async def get_average_element_per_link(get_statistics: GetStatistics):
return await average_elements_per_link(get_statistics.user)


@app.post("/api/statistics/get-average-jobs-per-day")
async def average_jobs_per_day(get_statistics: GetStatistics):
data = await get_jobs_per_day(get_statistics.user)
return data
59 changes: 59 additions & 0 deletions api/backend/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,62 @@ async def delete_jobs(jobs: list[str]):
LOG.info(f"RESULT: {result.deleted_count} documents deleted")

return True if result.deleted_count > 0 else False


async def average_elements_per_link(user: str):
collection = get_job_collection()
pipeline = [
{"$match": {"status": "Completed", "user": user}},
{
"$project": {
"date": {
"$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"}
},
"num_elements": {"$size": "$elements"},
}
},
{
"$group": {
"_id": "$date",
"average_elements": {"$avg": "$num_elements"},
"count": {"$sum": 1},
}
},
{"$sort": {"_id": 1}},
]
cursor = collection.aggregate(pipeline)
results: list[dict[str, Any]] = []

async for document in cursor:
results.append(
{
"date": document["_id"],
"average_elements": document["average_elements"],
"count": document["count"],
}
)

return results


async def get_jobs_per_day(user: str):
collection = get_job_collection()
pipeline = [
{"$match": {"status": "Completed", "user": user}},
{
"$project": {
"date": {
"$dateToString": {"format": "%Y-%m-%d", "date": "$time_created"}
}
}
},
{"$group": {"_id": "$date", "job_count": {"$sum": 1}}},
{"$sort": {"_id": 1}},
]
cursor = collection.aggregate(pipeline)

results: list[dict[str, Any]] = []
async for document in cursor:
results.append({"date": document["_id"], "job_count": document["job_count"]})

return results
7 changes: 6 additions & 1 deletion api/backend/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# STL
from typing import Any, Optional
from datetime import datetime

# PDM
import pydantic
Expand Down Expand Up @@ -27,7 +28,7 @@ class SubmitScrapeJob(pydantic.BaseModel):
url: str
elements: list[Element]
user: Optional[str] = None
time_created: Optional[str] = None
time_created: Optional[datetime] = None
result: Optional[dict[str, Any]] = None
job_options: JobOptions
status: str = "Queued"
Expand All @@ -43,3 +44,7 @@ class DownloadJob(pydantic.BaseModel):

class DeleteScrapeJobs(pydantic.BaseModel):
ids: list[str]


class GetStatistics(pydantic.BaseModel):
user: str
2 changes: 1 addition & 1 deletion api/backend/worker/job_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def main():
LOG.info("Starting job worker...")
while True:
await process_job()
await asyncio.sleep(5) # Sleep for 5 seconds before checking for new jobs
await asyncio.sleep(5)


if __name__ == "__main__":
Expand Down
Binary file modified docs/main_page.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/stats_page.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"@testing-library/user-event": "^13.5.0",
"axios": "^1.7.2",
"bootstrap": "^5.3.0",
"chart.js": "^4.4.3",
"framer-motion": "^4.1.17",
"next": "^14.2.4",
"next-auth": "^4.24.7",
Expand Down
6 changes: 3 additions & 3 deletions src/components/JobTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ interface ColorMap {
}

const COLOR_MAP: ColorMap = {
Queued: "rgba(255,201,5,0.5)",
Scraping: "rgba(3,104,255,0.5)",
Completed: "rgba(5,255,51,0.5)",
Queued: "rgba(255,201,5,0.25)",
Scraping: "rgba(3,104,255,0.25)",
Completed: "rgba(5,255,51,0.25)",
};

const JobTable: React.FC<JobTableProps> = ({ jobs, fetchJobs }) => {
Expand Down
10 changes: 10 additions & 0 deletions src/components/NavDrawer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import HomeIcon from "@mui/icons-material/Home";
import HttpIcon from "@mui/icons-material/Http";
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
import TerminalIcon from "@mui/icons-material/Terminal";
import BarChart from "@mui/icons-material/BarChart";
import { useRouter } from "next/router";
import { useTheme } from "@mui/material/styles";

Expand Down Expand Up @@ -77,6 +78,15 @@ const NavDrawer: React.FC<NavDrawerProps> = ({ toggleTheme, isDarkMode }) => {
</ListItemButton>
</ListItem>
<Divider />
<ListItem>
<ListItemButton onClick={() => router.push("/statistics")}>
<ListItemIcon>
<BarChart />
</ListItemIcon>
<ListItemText primary="Statistics" />
</ListItemButton>
</ListItem>
<Divider />
<ListItem>
<ListItemButton onClick={() => router.push("/logs")}>
<ListItemIcon>
Expand Down
Loading

0 comments on commit b4e059a

Please sign in to comment.