diff --git a/api/backend/job/__init__.py b/api/backend/job/__init__.py new file mode 100644 index 0000000..2dfc67e --- /dev/null +++ b/api/backend/job/__init__.py @@ -0,0 +1,19 @@ +from .job import ( + query, + insert, + update_job, + delete_jobs, + get_jobs_per_day, + get_queued_job, + average_elements_per_link, +) + +__all__ = [ + "query", + "insert", + "update_job", + "delete_jobs", + "get_jobs_per_day", + "get_queued_job", + "average_elements_per_link", +] diff --git a/api/backend/job.py b/api/backend/job/job.py similarity index 98% rename from api/backend/job.py rename to api/backend/job/job.py index 5d550b4..1688d0f 100644 --- a/api/backend/job.py +++ b/api/backend/job/job.py @@ -6,8 +6,8 @@ from pymongo import DESCENDING # LOCAL -from api.backend.models import FetchOptions from api.backend.database import get_job_collection +from api.backend.job.models.job_options import FetchOptions LOG = logging.getLogger(__name__) diff --git a/api/backend/job/models/__init__.py b/api/backend/job/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/backend/job/models/job_options.py b/api/backend/job/models/job_options.py new file mode 100644 index 0000000..9e4b936 --- /dev/null +++ b/api/backend/job/models/job_options.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel +from typing import Any, Optional +from api.backend.job.models.site_map import SiteMap + + +class FetchOptions(BaseModel): + chat: Optional[bool] = None + + +class JobOptions(BaseModel): + multi_page_scrape: bool = False + custom_headers: dict[str, Any] = {} + proxies: list[str] = [] + site_map: Optional[SiteMap] = None diff --git a/api/backend/job/models/site_map.py b/api/backend/job/models/site_map.py new file mode 100644 index 0000000..c0e581f --- /dev/null +++ b/api/backend/job/models/site_map.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel +from typing import Literal + + +class Action(BaseModel): + type: Literal["click", "input"] + xpath: str + name: str + input: str = "" + do_once: bool = True + + +class SiteMap(BaseModel): + actions: list[Action] diff --git a/api/backend/job/scraping/scraping_utils.py b/api/backend/job/scraping/scraping_utils.py new file mode 100644 index 0000000..44fdedf --- /dev/null +++ b/api/backend/job/scraping/scraping_utils.py @@ -0,0 +1,30 @@ +import time +from typing import cast + +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + + +def scrape_content(driver: webdriver.Chrome, pages: set[tuple[str, str]]): + _ = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) + ) + + last_height = cast(str, driver.execute_script("return document.body.scrollHeight")) + while True: + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + + time.sleep(3) # Wait for the page to load + new_height = cast( + str, driver.execute_script("return document.body.scrollHeight") + ) + + if new_height == last_height: + break + + last_height = new_height + + pages.add((driver.page_source, driver.current_url)) + return driver.page_source diff --git a/api/backend/job/site_mapping/__init__.py b/api/backend/job/site_mapping/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/backend/job/site_mapping/site_mapping.py b/api/backend/job/site_mapping/site_mapping.py new file mode 100644 index 0000000..94ff4b1 --- /dev/null +++ b/api/backend/job/site_mapping/site_mapping.py @@ -0,0 +1,94 @@ +from api.backend.job.models.site_map import Action, SiteMap +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException +from selenium.webdriver.common.by import By +from typing import Any +import logging +import time +from copy import deepcopy + +from api.backend.job.scraping.scraping_utils import scrape_content +from selenium.webdriver.support.ui import WebDriverWait +from seleniumwire.inspect import TimeoutException +from seleniumwire.webdriver import Chrome +from selenium.webdriver.support import expected_conditions as EC + +LOG = logging.getLogger(__name__) + + +def clear_done_actions(site_map: dict[str, Any]): + """Clear all actions that have been clicked.""" + cleared_site_map = deepcopy(site_map) + + cleared_site_map["actions"] = [ + action for action in cleared_site_map["actions"] if not action["do_once"] + ] + + return cleared_site_map + + +def handle_input(action: Action, driver: webdriver.Chrome): + try: + element = WebDriverWait(driver, 10).until( + EC.element_to_be_clickable((By.XPATH, action.xpath)) + ) + LOG.info(f"Sending keys: {action.input} to element: {element}") + + element.send_keys(action.input) + + except NoSuchElementException: + LOG.info(f"Element not found: {action.xpath}") + return False + + except TimeoutException: + LOG.info(f"Timeout waiting for element: {action.xpath}") + return False + + except Exception as e: + LOG.info(f"Error handling input: {e}") + return False + + return True + + +def handle_click(action: Action, driver: webdriver.Chrome): + try: + element = driver.find_element(By.XPATH, action.xpath) + LOG.info(f"Clicking element: {element}") + + element.click() + + except NoSuchElementException: + LOG.info(f"Element not found: {action.xpath}") + return False + + return True + + +ACTION_MAP = { + "click": handle_click, + "input": handle_input, +} + + +async def handle_site_mapping( + site_map_dict: dict[str, Any], + driver: Chrome, + pages: set[tuple[str, str]], +): + site_map = SiteMap(**site_map_dict) + LOG.info(f"Handling site map: {site_map}") + + for action in site_map.actions: + action_handler = ACTION_MAP[action.type] + if not action_handler(action, driver): + return + + time.sleep(2) + + _ = scrape_content(driver, pages) + + cleared_site_map_dict = clear_done_actions(site_map_dict) + + if cleared_site_map_dict["actions"]: + await handle_site_mapping(cleared_site_map_dict, driver, pages) diff --git a/api/backend/models.py b/api/backend/models.py index fb10d70..b52a704 100644 --- a/api/backend/models.py +++ b/api/backend/models.py @@ -2,12 +2,14 @@ from typing import Any, Optional, Union from datetime import datetime +# LOCAL +from api.backend.job.models.job_options import JobOptions + # PDM import pydantic -class FetchOptions(pydantic.BaseModel): - chat: Optional[bool] = None + class Element(pydantic.BaseModel): @@ -22,12 +24,6 @@ class CapturedElement(pydantic.BaseModel): name: str -class JobOptions(pydantic.BaseModel): - multi_page_scrape: bool = False - custom_headers: Optional[dict[str, Any]] = {} - proxies: Optional[list[str]] = [] - - class RetrieveScrapeJobs(pydantic.BaseModel): user: str diff --git a/api/backend/routers/job_router.py b/api/backend/routers/job_router.py index d5884ca..2c12ac9 100644 --- a/api/backend/routers/job_router.py +++ b/api/backend/routers/job_router.py @@ -12,22 +12,17 @@ from fastapi.responses import JSONResponse, StreamingResponse # LOCAL -from api.backend.job import ( - query, - insert, - update_job, - delete_jobs, -) +from api.backend.job import query, insert, update_job, delete_jobs from api.backend.models import ( UpdateJobs, DownloadJob, - FetchOptions, DeleteScrapeJobs, Job, ) from api.backend.schemas import User from api.backend.auth.auth_utils import get_current_user from api.backend.utils import clean_text +from api.backend.job.models.job_options import FetchOptions LOG = logging.getLogger(__name__) diff --git a/api/backend/scraping.py b/api/backend/scraping.py index 9418403..c0c1dce 100644 --- a/api/backend/scraping.py +++ b/api/backend/scraping.py @@ -1,19 +1,20 @@ import logging from typing import Any, Optional -import time import random from bs4 import BeautifulSoup from lxml import etree from seleniumwire import webdriver -from lxml.etree import _Element # type: ignore [reportPrivateImport] +from lxml.etree import _Element # pyright: ignore [reportPrivateUsage] from fake_useragent import UserAgent -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.chrome.options import Options as ChromeOptions from urllib.parse import urlparse, urljoin from api.backend.models import Element, CapturedElement +from api.backend.job.site_mapping.site_mapping import ( + handle_site_mapping, +) +from api.backend.job.scraping.scraping_utils import scrape_content +from api.backend.job.models.site_map import SiteMap LOG = logging.getLogger(__name__) @@ -95,6 +96,7 @@ async def make_site_request( pages: set[tuple[str, str]] = set(), original_url: str = "", proxies: Optional[list[str]] = [], + site_map: Optional[dict[str, Any]] = None, ) -> None: """Make basic `GET` request to site using Selenium.""" # Check if URL has already been visited @@ -114,27 +116,16 @@ async def make_site_request( final_url = driver.current_url visited_urls.add(url) visited_urls.add(final_url) - _ = WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.TAG_NAME, "body")) - ) - last_height = driver.execute_script("return document.body.scrollHeight") - while True: - driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + page_source = scrape_content(driver, pages) - time.sleep(3) # Wait for the page to load - new_height = driver.execute_script("return document.body.scrollHeight") - - if new_height == last_height: - break - - last_height = new_height - - final_height = driver.execute_script("return document.body.scrollHeight") - - page_source = driver.page_source - LOG.debug(f"Page source for url: {url}\n{page_source}") - pages.add((page_source, final_url)) + if site_map: + LOG.info("Site map: %s", site_map) + _ = await handle_site_mapping( + site_map, + driver, + pages, + ) finally: driver.quit() @@ -192,6 +183,7 @@ async def scrape( headers: Optional[dict[str, Any]], multi_page_scrape: bool = False, proxies: Optional[list[str]] = [], + site_map: Optional[SiteMap] = None, ): visited_urls: set[str] = set() pages: set[tuple[str, str]] = set() @@ -204,6 +196,7 @@ async def scrape( pages=pages, original_url=url, proxies=proxies, + site_map=site_map, ) elements: list[dict[str, dict[str, list[CapturedElement]]]] = list() diff --git a/api/backend/worker/job_worker.py b/api/backend/worker/job_worker.py index 6ae5c16..13ff3ff 100644 --- a/api/backend/worker/job_worker.py +++ b/api/backend/worker/job_worker.py @@ -24,6 +24,7 @@ async def process_job(): job["job_options"]["custom_headers"], job["job_options"]["multi_page_scrape"], job["job_options"]["proxies"], + job["job_options"]["site_map"], ) LOG.info( f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}" diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 8c2d902..97cce17 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -10,5 +10,8 @@ services: - "$PWD/package-lock.json:/app/package-lock.json" - "$PWD/tsconfig.json:/app/tsconfig.json" scraperr_api: + environment: + - LOG_LEVEL=INFO volumes: - "$PWD/api:/project/api" + - "$PWD/scraping:/project/scraping" diff --git a/src/components/jobs/JobQueue.tsx b/src/components/jobs/JobQueue.tsx index 899f2e5..be770c4 100644 --- a/src/components/jobs/JobQueue.tsx +++ b/src/components/jobs/JobQueue.tsx @@ -15,6 +15,7 @@ import { Button, Tooltip, IconButton, + TableContainer, } from "@mui/material"; import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; import StarIcon from "@mui/icons-material/Star"; @@ -52,145 +53,147 @@ export const JobQueue = ({ const router = useRouter(); return ( - - - - Select - Id - Url - Elements - Result - Time Created - Status - Actions - - - - {filteredJobs.map((row, index) => ( - - - onSelectJob(row.id)} - /> - - - { - router.push({ - pathname: "/chat", - query: { - job: row.id, - }, - }); - }} - > - - - - - - - { - onFavorite([row.id], "favorite", !row.favorite); - row.favorite = !row.favorite; - }} - > - - - - - - - {row.id} - - - {row.url} - - - - {JSON.stringify(row.elements)} - - - - - } - aria-controls="panel1a-content" - id="panel1a-header" - sx={{ - minHeight: 0, - "&.Mui-expanded": { minHeight: 0 }, - }} - > - +
+ + + Select + Id + Url + Elements + Result + Time Created + Status + Actions + + + + {filteredJobs.map((row, index) => ( + + + onSelectJob(row.id)} + /> + + + { + router.push({ + pathname: "/chat", + query: { + job: row.id, + }, + }); + }} + > + + + + + + + { + onFavorite([row.id], "favorite", !row.favorite); + row.favorite = !row.favorite; + }} + > + + + + + + + {row.id} + + + {row.url} + + + + {JSON.stringify(row.elements)} + + + + + } + aria-controls="panel1a-content" + id="panel1a-header" sx={{ - maxHeight: 150, - overflow: "auto", - width: "100%", + minHeight: 0, + "&.Mui-expanded": { minHeight: 0 }, }} > - - Show Result - - - - - - - {JSON.stringify(row.result, null, 2)} - + + Show Result + + + + + + + {JSON.stringify(row.result, null, 2)} + + + + + + + + {new Date(row.time_created).toLocaleString()} + + + + + + {row.status} - - - - - - {new Date(row.time_created).toLocaleString()} - - - - - - {row.status} - - - - - - - - - - ))} - -
+ + + + + + + + + ))} + + + ); }; diff --git a/src/components/pages/home/home.tsx b/src/components/pages/home/home.tsx new file mode 100644 index 0000000..da5732f --- /dev/null +++ b/src/components/pages/home/home.tsx @@ -0,0 +1,107 @@ +"use client"; + +import React, { useState, useEffect, useRef } from "react"; +import { Button, Container, Box, Snackbar, Alert } from "@mui/material"; +import { useRouter } from "next/router"; +import { Element, Result } from "@/types"; +import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter"; +import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider"; + +export const Home = () => { + const { + submittedURL, + setSubmittedURL, + rows, + setRows, + results, + snackbarOpen, + setSnackbarOpen, + snackbarMessage, + snackbarSeverity, + } = useJobSubmitterProvider(); + const router = useRouter(); + const { elements, url } = router.query; + + const resultsRef = useRef(null); + + useEffect(() => { + if (elements) { + setRows(JSON.parse(elements as string)); + } + if (url) { + setSubmittedURL(url as string); + } + }, [elements, url]); + + useEffect(() => { + if (results && resultsRef.current) { + resultsRef.current.scrollIntoView({ behavior: "smooth" }); + } + }, [results]); + + const handleCloseSnackbar = () => { + setSnackbarOpen(false); + }; + + const ErrorSnackbar = () => { + return ( + + + {snackbarMessage} + + + ); + }; + + const NotifySnackbar = () => { + const goTo = () => { + router.push("/jobs"); + }; + + const action = ( + + ); + + return ( + + + {snackbarMessage} + + + ); + }; + + return ( + + + + {submittedURL.length ? ( + + ) : null} + + {snackbarSeverity === "info" ? : } + + ); +}; diff --git a/src/components/pages/home/index.ts b/src/components/pages/home/index.ts new file mode 100644 index 0000000..84d36cd --- /dev/null +++ b/src/components/pages/home/index.ts @@ -0,0 +1 @@ +export * from "./home"; diff --git a/src/components/submit/index.ts b/src/components/submit/index.ts index 400e380..7ddcadf 100644 --- a/src/components/submit/index.ts +++ b/src/components/submit/index.ts @@ -1,2 +1 @@ -export * from "./ElementTable"; export * from "./job-submitter"; diff --git a/src/components/submit/ElementTable.tsx b/src/components/submit/job-submitter/element-table/element-table.tsx similarity index 95% rename from src/components/submit/ElementTable.tsx rename to src/components/submit/job-submitter/element-table/element-table.tsx index b29b0e3..d693346 100644 --- a/src/components/submit/ElementTable.tsx +++ b/src/components/submit/job-submitter/element-table/element-table.tsx @@ -15,9 +15,11 @@ import { IconButton, Tooltip, useTheme, + Divider, } from "@mui/material"; import AddIcon from "@mui/icons-material/Add"; -import { Element } from "../../types"; +import { Element } from "@/types"; +import { SiteMap } from "../site-map"; interface Props { rows: Element[]; @@ -169,6 +171,13 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => { + + ); }; diff --git a/src/components/submit/job-submitter/element-table/index.ts b/src/components/submit/job-submitter/element-table/index.ts new file mode 100644 index 0000000..c8f10cf --- /dev/null +++ b/src/components/submit/job-submitter/element-table/index.ts @@ -0,0 +1 @@ +export { ElementTable } from "./element-table"; diff --git a/src/components/submit/job-submitter/index.ts b/src/components/submit/job-submitter/index.ts index d1879c4..08d060e 100644 --- a/src/components/submit/job-submitter/index.ts +++ b/src/components/submit/job-submitter/index.ts @@ -1 +1,2 @@ export { JobSubmitter } from "./job-submitter"; +export { ElementTable } from "./element-table"; diff --git a/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx b/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx index cdbdfa3..5a29b51 100644 --- a/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx +++ b/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx @@ -1,26 +1,20 @@ -import React, { Dispatch } from "react"; +import React from "react"; import { TextField, Button, CircularProgress } from "@mui/material"; -import { Element } from "@/types"; +import { useJobSubmitterProvider } from "../provider"; export type JobSubmitterInputProps = { - submittedURL: string; - setSubmittedURL: Dispatch>; - isValidURL: boolean; urlError: string | null; handleSubmit: () => void; loading: boolean; - rows: Element[]; }; export const JobSubmitterInput = ({ - submittedURL, - setSubmittedURL, - isValidURL, - urlError, handleSubmit, loading, - rows, + urlError, }: JobSubmitterInputProps) => { + const { submittedURL, setSubmittedURL, isValidURL, rows } = + useJobSubmitterProvider(); return (
{ const handleMultiPageScrapeChange = () => { diff --git a/src/components/submit/job-submitter/job-submitter.tsx b/src/components/submit/job-submitter/job-submitter.tsx index 83b92e9..2a54a00 100644 --- a/src/components/submit/job-submitter/job-submitter.tsx +++ b/src/components/submit/job-submitter/job-submitter.tsx @@ -1,7 +1,6 @@ "use client"; -import React, { useEffect, useState, Dispatch } from "react"; -import { Element } from "@/types"; +import React, { useEffect, useState } from "react"; import { useAuth } from "@/contexts/AuthContext"; import { useRouter } from "next/router"; import { RawJobOptions } from "@/types/job"; @@ -10,21 +9,7 @@ import { JobSubmitterHeader } from "./job-submitter-header"; import { JobSubmitterInput } from "./job-submitter-input"; import { JobSubmitterOptions } from "./job-submitter-options"; import { ApiService } from "@/services"; - -interface StateProps { - submittedURL: string; - setSubmittedURL: Dispatch>; - rows: Element[]; - isValidURL: boolean; - setIsValidUrl: Dispatch>; - setSnackbarMessage: Dispatch>; - setSnackbarOpen: Dispatch>; - setSnackbarSeverity: Dispatch>; -} - -interface Props { - stateProps: StateProps; -} +import { useJobSubmitterProvider } from "./provider"; const initialJobOptions: RawJobOptions = { multi_page_scrape: false, @@ -32,7 +17,7 @@ const initialJobOptions: RawJobOptions = { proxies: null, }; -export const JobSubmitter = ({ stateProps }: Props) => { +export const JobSubmitter = () => { const { user } = useAuth(); const router = useRouter(); const { job_options } = router.query; @@ -40,11 +25,13 @@ export const JobSubmitter = ({ stateProps }: Props) => { const { submittedURL, rows, + siteMap, setIsValidUrl, setSnackbarMessage, setSnackbarOpen, setSnackbarSeverity, - } = stateProps; + setSiteMap, + } = useJobSubmitterProvider(); const [urlError, setUrlError] = useState(null); const [loading, setLoading] = useState(false); @@ -87,7 +74,8 @@ export const JobSubmitter = ({ stateProps }: Props) => { rows, user, jobOptions, - customHeaders + customHeaders, + siteMap ) .then(async (response) => { if (!response.ok) { @@ -120,31 +108,28 @@ export const JobSubmitter = ({ stateProps }: Props) => { job_options as string, setCustomJSONSelected, setProxiesSelected, - setJobOptions + setJobOptions, + setSiteMap ); } }, [job_options]); return ( - <> -
- - - -
- +
+ + + +
); }; diff --git a/src/components/submit/job-submitter/provider.tsx b/src/components/submit/job-submitter/provider.tsx new file mode 100644 index 0000000..f8a3fda --- /dev/null +++ b/src/components/submit/job-submitter/provider.tsx @@ -0,0 +1,84 @@ +import React, { + createContext, + PropsWithChildren, + useContext, + useState, + Dispatch, + useMemo, +} from "react"; +import { Element, Result, SiteMap } from "@/types"; + +type JobSubmitterProviderType = { + submittedURL: string; + setSubmittedURL: Dispatch>; + rows: Element[]; + setRows: Dispatch>; + results: Result; + setResults: Dispatch>; + snackbarOpen: boolean; + setSnackbarOpen: Dispatch>; + snackbarMessage: string; + setSnackbarMessage: Dispatch>; + snackbarSeverity: string; + setSnackbarSeverity: Dispatch>; + isValidURL: boolean; + setIsValidUrl: Dispatch>; + siteMap: SiteMap | null; + setSiteMap: Dispatch>; +}; + +const JobSubmitterProvider = createContext( + {} as JobSubmitterProviderType +); + +export const Provider = ({ children }: PropsWithChildren) => { + const [submittedURL, setSubmittedURL] = useState(""); + const [rows, setRows] = useState([]); + const [results, setResults] = useState({}); + const [snackbarOpen, setSnackbarOpen] = useState(false); + const [snackbarMessage, setSnackbarMessage] = useState(""); + const [snackbarSeverity, setSnackbarSeverity] = useState("error"); + const [isValidURL, setIsValidUrl] = useState(true); + const [siteMap, setSiteMap] = useState(null); + + const value: JobSubmitterProviderType = useMemo( + () => ({ + submittedURL, + setSubmittedURL, + rows, + setRows, + results, + setResults, + snackbarOpen, + setSnackbarOpen, + snackbarMessage, + setSnackbarMessage, + snackbarSeverity, + setSnackbarSeverity, + isValidURL, + setIsValidUrl, + siteMap, + setSiteMap, + }), + [ + submittedURL, + rows, + results, + snackbarOpen, + snackbarMessage, + snackbarSeverity, + isValidURL, + siteMap, + ] + ); + + return ( + + {children} + + ); +}; + +export const useJobSubmitterProvider = () => { + return useContext(JobSubmitterProvider); +}; diff --git a/src/components/submit/job-submitter/site-map/index.ts b/src/components/submit/job-submitter/site-map/index.ts new file mode 100644 index 0000000..0e74d0f --- /dev/null +++ b/src/components/submit/job-submitter/site-map/index.ts @@ -0,0 +1 @@ +export * from "./site-map"; diff --git a/src/components/submit/job-submitter/site-map/site-map-input/index.ts b/src/components/submit/job-submitter/site-map/site-map-input/index.ts new file mode 100644 index 0000000..31de220 --- /dev/null +++ b/src/components/submit/job-submitter/site-map/site-map-input/index.ts @@ -0,0 +1 @@ +export * from "./site-map-input"; diff --git a/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.module.css b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.module.css new file mode 100644 index 0000000..b83bdc8 --- /dev/null +++ b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.module.css @@ -0,0 +1,21 @@ +.button { + height: 3rem; + width: 2rem; + + color: #ffffff; + font-weight: 600; + border-radius: 0.375rem; + transition: transform 0.2s ease-in-out; + transform: scale(1); + &:hover { + transform: scale(1.05); + } +} + +.remove { + background-color: var(--delete-red) !important; +} + +.remove:hover { + background-color: var(--delete-red-hover) !important; +} diff --git a/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.tsx b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.tsx new file mode 100644 index 0000000..43195bd --- /dev/null +++ b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.tsx @@ -0,0 +1,135 @@ +import { useState } from "react"; +import { useJobSubmitterProvider } from "../../provider"; +import { + MenuItem, + Select, + TextField, + FormControl, + Button, + Checkbox, + FormControlLabel, +} from "@mui/material"; +import { ActionOption } from "@/types/job"; +import classes from "./site-map-input.module.css"; +import { clsx } from "clsx"; + +export type SiteMapInputProps = { + disabled?: boolean; + xpath?: string; + option?: ActionOption; + clickOnce?: boolean; + input?: string; +}; + +export const SiteMapInput = ({ + disabled, + xpath, + option, + clickOnce, + input, +}: SiteMapInputProps) => { + console.log(clickOnce); + const [optionState, setOptionState] = useState( + option || "click" + ); + const [xpathState, setXpathState] = useState(xpath || ""); + const [clickOnceState, setClickOnceState] = useState( + clickOnce || false + ); + const [inputState, setInputState] = useState(input || ""); + + const { siteMap, setSiteMap } = useJobSubmitterProvider(); + + const handleAdd = () => { + if (!siteMap) return; + + console.log(optionState, xpathState, clickOnceState, inputState); + + setSiteMap((prevSiteMap) => ({ + ...prevSiteMap, + actions: [ + { + type: optionState, + xpath: xpathState, + name: "", + do_once: clickOnceState, + input: inputState, + }, + ...(prevSiteMap?.actions || []), + ], + })); + + setXpathState(""); + }; + + const handleRemove = () => { + if (!siteMap) return; + + setSiteMap((prevSiteMap) => ({ + ...prevSiteMap, + actions: (prevSiteMap?.actions || []).slice(0, -1), + })); + }; + + return ( +
+
+ + + + {optionState === "input" && ( + setInputState(e.target.value)} + disabled={disabled} + /> + )} + setXpathState(e.target.value)} + disabled={disabled} + /> + {disabled ? ( + + ) : ( + + )} +
+ {!disabled && ( + setClickOnceState(!clickOnceState)} + /> + } + /> + )} +
+ ); +}; diff --git a/src/components/submit/job-submitter/site-map/site-map.tsx b/src/components/submit/job-submitter/site-map/site-map.tsx new file mode 100644 index 0000000..d301edf --- /dev/null +++ b/src/components/submit/job-submitter/site-map/site-map.tsx @@ -0,0 +1,70 @@ +import { useEffect, useState } from "react"; +import { useJobSubmitterProvider } from "../provider"; +import { Button, Divider, Typography, useTheme } from "@mui/material"; +import { SiteMapInput } from "./site-map-input"; + +export const SiteMap = () => { + const { siteMap, setSiteMap } = useJobSubmitterProvider(); + const [showSiteMap, setShowSiteMap] = useState(false); + const theme = useTheme(); + + const handleCreateSiteMap = () => { + setSiteMap({ actions: [] }); + setShowSiteMap(true); + }; + + const handleClearSiteMap = () => { + setSiteMap(null); + setShowSiteMap(false); + }; + + useEffect(() => { + if (siteMap) { + setShowSiteMap(true); + } + }, [siteMap]); + + return ( +
+ {siteMap ? ( + + ) : ( + + )} + {showSiteMap && ( +
+ + {siteMap?.actions && siteMap?.actions.length > 0 && ( + <> + + + Site Map Actions + + + )} +
    + {siteMap?.actions.reverse().map((action, index) => ( +
  • + + Action {index + 1}: + + +
  • + ))} +
+
+ )} +
+ ); +}; diff --git a/src/lib/helpers/parse-job-options.ts b/src/lib/helpers/parse-job-options.ts index e5c22bc..cb27d09 100644 --- a/src/lib/helpers/parse-job-options.ts +++ b/src/lib/helpers/parse-job-options.ts @@ -1,15 +1,17 @@ import { Dispatch, SetStateAction } from "react"; -import { RawJobOptions } from "@/types"; +import { RawJobOptions, SiteMap } from "@/types"; export const parseJobOptions = ( job_options: string, setCustomJSONSelected: Dispatch>, setProxiesSelected: Dispatch>, - setJobOptions: Dispatch> + setJobOptions: Dispatch>, + setSiteMap: Dispatch> ) => { if (job_options) { const jsonOptions = JSON.parse(job_options as string); + console.log(jsonOptions); const newJobOptions: RawJobOptions = { multi_page_scrape: false, custom_headers: null, @@ -31,6 +33,10 @@ export const parseJobOptions = ( newJobOptions.proxies = jsonOptions.proxies.join(","); } + if (jsonOptions.site_map) { + setSiteMap(jsonOptions.site_map); + } + setJobOptions(newJobOptions); } }; diff --git a/src/pages/index.tsx b/src/pages/index.tsx index 34ad12f..c7bbc8e 100644 --- a/src/pages/index.tsx +++ b/src/pages/index.tsx @@ -1,117 +1,10 @@ -"use client"; - -import React, { useState, useEffect, useRef } from "react"; -import { Button, Container, Box, Snackbar, Alert } from "@mui/material"; -import { useRouter } from "next/router"; -import { Element, Result } from "@/types"; -import { ElementTable } from "@/components/submit"; -import { JobSubmitter } from "@/components/submit/job-submitter"; - -const Home = () => { - const router = useRouter(); - const { elements, url } = router.query; - - const [submittedURL, setSubmittedURL] = useState(""); - const [rows, setRows] = useState([]); - const [results, setResults] = useState({}); - const [snackbarOpen, setSnackbarOpen] = useState(false); - const [snackbarMessage, setSnackbarMessage] = useState(""); - const [snackbarSeverity, setSnackbarSeverity] = useState("error"); - const [isValidURL, setIsValidUrl] = useState(true); - - const resultsRef = useRef(null); - - useEffect(() => { - if (elements) { - setRows(JSON.parse(elements as string)); - } - if (url) { - setSubmittedURL(url as string); - } - }, [elements, url]); - - useEffect(() => { - if (results && resultsRef.current) { - resultsRef.current.scrollIntoView({ behavior: "smooth" }); - } - }, [results]); - - const handleCloseSnackbar = () => { - setSnackbarOpen(false); - }; - - const ErrorSnackbar = () => { - return ( - - - {snackbarMessage} - - - ); - }; - - const NotifySnackbar = () => { - const goTo = () => { - router.push("/jobs"); - }; - - const action = ( - - ); - - return ( - - - {snackbarMessage} - - - ); - }; +import { Provider as JobSubmitterProvider } from "@/components/submit/job-submitter/provider"; +import { Home } from "@/components/pages/home/home"; +export default function Main() { return ( - - - - {submittedURL.length ? ( - - ) : null} - - {snackbarSeverity === "info" ? : } - + + + ); -}; - -export default Home; +} diff --git a/src/services/api-service/functions/submit-job.ts b/src/services/api-service/functions/submit-job.ts index fc36aa7..e009e22 100644 --- a/src/services/api-service/functions/submit-job.ts +++ b/src/services/api-service/functions/submit-job.ts @@ -1,9 +1,12 @@ +import { SiteMap } from "@/types/job"; + export const submitJob = async ( submittedURL: string, rows: any[], user: any, jobOptions: any, - customHeaders: any + customHeaders: any, + siteMap: SiteMap | null ) => { return await fetch(`/api/submit-scrape-job`, { method: "POST", @@ -18,6 +21,7 @@ export const submitJob = async ( ...jobOptions, custom_headers: customHeaders || {}, proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [], + site_map: siteMap, }, }, }), diff --git a/src/styles/globals.css b/src/styles/globals.css index 033d23e..eb3c657 100644 --- a/src/styles/globals.css +++ b/src/styles/globals.css @@ -2,6 +2,11 @@ @tailwind components; @tailwind utilities; +:root { + --delete-red: #ef4444; + --delete-red-hover: #ff6969; +} + #__next { height: 100%; } diff --git a/src/styles/themes.ts b/src/styles/themes.ts index acc8284..f314084 100644 --- a/src/styles/themes.ts +++ b/src/styles/themes.ts @@ -34,6 +34,12 @@ const commonThemeOptions = { h4: { fontWeight: 500, }, + h5: { + fontWeight: 500, + }, + h6: { + fontWeight: 500, + }, body1: { fontFamily: '"Schibsted Grotesk", sans-serif', }, @@ -175,6 +181,9 @@ const darkTheme = createTheme({ h5: { color: "#ffffff", }, + h6: { + color: "#ffffff", + }, body1: { ...commonThemeOptions.typography.body1, color: "#ffffff", diff --git a/src/types/job.ts b/src/types/job.ts index 93bc381..785eb93 100644 --- a/src/types/job.ts +++ b/src/types/job.ts @@ -16,6 +16,7 @@ export type JobOptions = { multi_page_scrape: boolean; custom_headers: null | string; proxies: string[]; + site_map?: SiteMap; }; export type RawJobOptions = { @@ -23,3 +24,17 @@ export type RawJobOptions = { custom_headers: string | null; proxies: string | null; }; + +export type ActionOption = "click" | "input"; + +export type Action = { + type: ActionOption; + xpath: string; + name: string; + do_once?: boolean; + input?: string; +}; + +export type SiteMap = { + actions: Action[]; +};