diff --git a/api/backend/job/__init__.py b/api/backend/job/__init__.py
new file mode 100644
index 0000000..2dfc67e
--- /dev/null
+++ b/api/backend/job/__init__.py
@@ -0,0 +1,19 @@
+from .job import (
+ query,
+ insert,
+ update_job,
+ delete_jobs,
+ get_jobs_per_day,
+ get_queued_job,
+ average_elements_per_link,
+)
+
+__all__ = [
+ "query",
+ "insert",
+ "update_job",
+ "delete_jobs",
+ "get_jobs_per_day",
+ "get_queued_job",
+ "average_elements_per_link",
+]
diff --git a/api/backend/job.py b/api/backend/job/job.py
similarity index 98%
rename from api/backend/job.py
rename to api/backend/job/job.py
index 5d550b4..1688d0f 100644
--- a/api/backend/job.py
+++ b/api/backend/job/job.py
@@ -6,8 +6,8 @@
from pymongo import DESCENDING
# LOCAL
-from api.backend.models import FetchOptions
from api.backend.database import get_job_collection
+from api.backend.job.models.job_options import FetchOptions
LOG = logging.getLogger(__name__)
diff --git a/api/backend/job/models/__init__.py b/api/backend/job/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/api/backend/job/models/job_options.py b/api/backend/job/models/job_options.py
new file mode 100644
index 0000000..9e4b936
--- /dev/null
+++ b/api/backend/job/models/job_options.py
@@ -0,0 +1,14 @@
+from pydantic import BaseModel
+from typing import Any, Optional
+from api.backend.job.models.site_map import SiteMap
+
+
+class FetchOptions(BaseModel):
+ chat: Optional[bool] = None
+
+
+class JobOptions(BaseModel):
+ multi_page_scrape: bool = False
+ custom_headers: dict[str, Any] = {}
+ proxies: list[str] = []
+ site_map: Optional[SiteMap] = None
diff --git a/api/backend/job/models/site_map.py b/api/backend/job/models/site_map.py
new file mode 100644
index 0000000..c0e581f
--- /dev/null
+++ b/api/backend/job/models/site_map.py
@@ -0,0 +1,14 @@
+from pydantic import BaseModel
+from typing import Literal
+
+
+class Action(BaseModel):
+ type: Literal["click", "input"]
+ xpath: str
+ name: str
+ input: str = ""
+ do_once: bool = True
+
+
+class SiteMap(BaseModel):
+ actions: list[Action]
diff --git a/api/backend/job/scraping/scraping_utils.py b/api/backend/job/scraping/scraping_utils.py
new file mode 100644
index 0000000..44fdedf
--- /dev/null
+++ b/api/backend/job/scraping/scraping_utils.py
@@ -0,0 +1,30 @@
+import time
+from typing import cast
+
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+
+
+def scrape_content(driver: webdriver.Chrome, pages: set[tuple[str, str]]):
+ _ = WebDriverWait(driver, 10).until(
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
+ )
+
+ last_height = cast(str, driver.execute_script("return document.body.scrollHeight"))
+ while True:
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+
+ time.sleep(3) # Wait for the page to load
+ new_height = cast(
+ str, driver.execute_script("return document.body.scrollHeight")
+ )
+
+ if new_height == last_height:
+ break
+
+ last_height = new_height
+
+ pages.add((driver.page_source, driver.current_url))
+ return driver.page_source
diff --git a/api/backend/job/site_mapping/__init__.py b/api/backend/job/site_mapping/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/api/backend/job/site_mapping/site_mapping.py b/api/backend/job/site_mapping/site_mapping.py
new file mode 100644
index 0000000..94ff4b1
--- /dev/null
+++ b/api/backend/job/site_mapping/site_mapping.py
@@ -0,0 +1,94 @@
+from api.backend.job.models.site_map import Action, SiteMap
+from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+from typing import Any
+import logging
+import time
+from copy import deepcopy
+
+from api.backend.job.scraping.scraping_utils import scrape_content
+from selenium.webdriver.support.ui import WebDriverWait
+from seleniumwire.inspect import TimeoutException
+from seleniumwire.webdriver import Chrome
+from selenium.webdriver.support import expected_conditions as EC
+
+LOG = logging.getLogger(__name__)
+
+
+def clear_done_actions(site_map: dict[str, Any]):
+ """Clear all actions that have been clicked."""
+ cleared_site_map = deepcopy(site_map)
+
+ cleared_site_map["actions"] = [
+ action for action in cleared_site_map["actions"] if not action["do_once"]
+ ]
+
+ return cleared_site_map
+
+
+def handle_input(action: Action, driver: webdriver.Chrome):
+ try:
+ element = WebDriverWait(driver, 10).until(
+ EC.element_to_be_clickable((By.XPATH, action.xpath))
+ )
+ LOG.info(f"Sending keys: {action.input} to element: {element}")
+
+ element.send_keys(action.input)
+
+ except NoSuchElementException:
+ LOG.info(f"Element not found: {action.xpath}")
+ return False
+
+ except TimeoutException:
+ LOG.info(f"Timeout waiting for element: {action.xpath}")
+ return False
+
+ except Exception as e:
+ LOG.info(f"Error handling input: {e}")
+ return False
+
+ return True
+
+
+def handle_click(action: Action, driver: webdriver.Chrome):
+ try:
+ element = driver.find_element(By.XPATH, action.xpath)
+ LOG.info(f"Clicking element: {element}")
+
+ element.click()
+
+ except NoSuchElementException:
+ LOG.info(f"Element not found: {action.xpath}")
+ return False
+
+ return True
+
+
+ACTION_MAP = {
+ "click": handle_click,
+ "input": handle_input,
+}
+
+
+async def handle_site_mapping(
+ site_map_dict: dict[str, Any],
+ driver: Chrome,
+ pages: set[tuple[str, str]],
+):
+ site_map = SiteMap(**site_map_dict)
+ LOG.info(f"Handling site map: {site_map}")
+
+ for action in site_map.actions:
+ action_handler = ACTION_MAP[action.type]
+ if not action_handler(action, driver):
+ return
+
+ time.sleep(2)
+
+ _ = scrape_content(driver, pages)
+
+ cleared_site_map_dict = clear_done_actions(site_map_dict)
+
+ if cleared_site_map_dict["actions"]:
+ await handle_site_mapping(cleared_site_map_dict, driver, pages)
diff --git a/api/backend/models.py b/api/backend/models.py
index fb10d70..b52a704 100644
--- a/api/backend/models.py
+++ b/api/backend/models.py
@@ -2,12 +2,14 @@
from typing import Any, Optional, Union
from datetime import datetime
+# LOCAL
+from api.backend.job.models.job_options import JobOptions
+
# PDM
import pydantic
-class FetchOptions(pydantic.BaseModel):
- chat: Optional[bool] = None
+
class Element(pydantic.BaseModel):
@@ -22,12 +24,6 @@ class CapturedElement(pydantic.BaseModel):
name: str
-class JobOptions(pydantic.BaseModel):
- multi_page_scrape: bool = False
- custom_headers: Optional[dict[str, Any]] = {}
- proxies: Optional[list[str]] = []
-
-
class RetrieveScrapeJobs(pydantic.BaseModel):
user: str
diff --git a/api/backend/routers/job_router.py b/api/backend/routers/job_router.py
index d5884ca..2c12ac9 100644
--- a/api/backend/routers/job_router.py
+++ b/api/backend/routers/job_router.py
@@ -12,22 +12,17 @@
from fastapi.responses import JSONResponse, StreamingResponse
# LOCAL
-from api.backend.job import (
- query,
- insert,
- update_job,
- delete_jobs,
-)
+from api.backend.job import query, insert, update_job, delete_jobs
from api.backend.models import (
UpdateJobs,
DownloadJob,
- FetchOptions,
DeleteScrapeJobs,
Job,
)
from api.backend.schemas import User
from api.backend.auth.auth_utils import get_current_user
from api.backend.utils import clean_text
+from api.backend.job.models.job_options import FetchOptions
LOG = logging.getLogger(__name__)
diff --git a/api/backend/scraping.py b/api/backend/scraping.py
index 9418403..c0c1dce 100644
--- a/api/backend/scraping.py
+++ b/api/backend/scraping.py
@@ -1,19 +1,20 @@
import logging
from typing import Any, Optional
-import time
import random
from bs4 import BeautifulSoup
from lxml import etree
from seleniumwire import webdriver
-from lxml.etree import _Element # type: ignore [reportPrivateImport]
+from lxml.etree import _Element # pyright: ignore [reportPrivateUsage]
from fake_useragent import UserAgent
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options as ChromeOptions
from urllib.parse import urlparse, urljoin
from api.backend.models import Element, CapturedElement
+from api.backend.job.site_mapping.site_mapping import (
+ handle_site_mapping,
+)
+from api.backend.job.scraping.scraping_utils import scrape_content
+from api.backend.job.models.site_map import SiteMap
LOG = logging.getLogger(__name__)
@@ -95,6 +96,7 @@ async def make_site_request(
pages: set[tuple[str, str]] = set(),
original_url: str = "",
proxies: Optional[list[str]] = [],
+ site_map: Optional[dict[str, Any]] = None,
) -> None:
"""Make basic `GET` request to site using Selenium."""
# Check if URL has already been visited
@@ -114,27 +116,16 @@ async def make_site_request(
final_url = driver.current_url
visited_urls.add(url)
visited_urls.add(final_url)
- _ = WebDriverWait(driver, 10).until(
- EC.presence_of_element_located((By.TAG_NAME, "body"))
- )
- last_height = driver.execute_script("return document.body.scrollHeight")
- while True:
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+ page_source = scrape_content(driver, pages)
- time.sleep(3) # Wait for the page to load
- new_height = driver.execute_script("return document.body.scrollHeight")
-
- if new_height == last_height:
- break
-
- last_height = new_height
-
- final_height = driver.execute_script("return document.body.scrollHeight")
-
- page_source = driver.page_source
- LOG.debug(f"Page source for url: {url}\n{page_source}")
- pages.add((page_source, final_url))
+ if site_map:
+ LOG.info("Site map: %s", site_map)
+ _ = await handle_site_mapping(
+ site_map,
+ driver,
+ pages,
+ )
finally:
driver.quit()
@@ -192,6 +183,7 @@ async def scrape(
headers: Optional[dict[str, Any]],
multi_page_scrape: bool = False,
proxies: Optional[list[str]] = [],
+ site_map: Optional[SiteMap] = None,
):
visited_urls: set[str] = set()
pages: set[tuple[str, str]] = set()
@@ -204,6 +196,7 @@ async def scrape(
pages=pages,
original_url=url,
proxies=proxies,
+ site_map=site_map,
)
elements: list[dict[str, dict[str, list[CapturedElement]]]] = list()
diff --git a/api/backend/worker/job_worker.py b/api/backend/worker/job_worker.py
index 6ae5c16..13ff3ff 100644
--- a/api/backend/worker/job_worker.py
+++ b/api/backend/worker/job_worker.py
@@ -24,6 +24,7 @@ async def process_job():
job["job_options"]["custom_headers"],
job["job_options"]["multi_page_scrape"],
job["job_options"]["proxies"],
+ job["job_options"]["site_map"],
)
LOG.info(
f"Scraped result for url: {job['url']}, with elements: {job['elements']}\n{scraped}"
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index 8c2d902..97cce17 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -10,5 +10,8 @@ services:
- "$PWD/package-lock.json:/app/package-lock.json"
- "$PWD/tsconfig.json:/app/tsconfig.json"
scraperr_api:
+ environment:
+ - LOG_LEVEL=INFO
volumes:
- "$PWD/api:/project/api"
+ - "$PWD/scraping:/project/scraping"
diff --git a/src/components/jobs/JobQueue.tsx b/src/components/jobs/JobQueue.tsx
index 899f2e5..be770c4 100644
--- a/src/components/jobs/JobQueue.tsx
+++ b/src/components/jobs/JobQueue.tsx
@@ -15,6 +15,7 @@ import {
Button,
Tooltip,
IconButton,
+ TableContainer,
} from "@mui/material";
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
import StarIcon from "@mui/icons-material/Star";
@@ -52,145 +53,147 @@ export const JobQueue = ({
const router = useRouter();
return (
-
-
-
- Select
- Id
- Url
- Elements
- Result
- Time Created
- Status
- Actions
-
-
-
- {filteredJobs.map((row, index) => (
-
-
- onSelectJob(row.id)}
- />
-
-
- {
- router.push({
- pathname: "/chat",
- query: {
- job: row.id,
- },
- });
- }}
- >
-
-
-
-
-
-
- {
- onFavorite([row.id], "favorite", !row.favorite);
- row.favorite = !row.favorite;
- }}
- >
-
-
-
-
-
-
- {row.id}
-
-
- {row.url}
-
-
-
- {JSON.stringify(row.elements)}
-
-
-
-
- }
- aria-controls="panel1a-content"
- id="panel1a-header"
- sx={{
- minHeight: 0,
- "&.Mui-expanded": { minHeight: 0 },
- }}
- >
-
+
+
+
+ Select
+ Id
+ Url
+ Elements
+ Result
+ Time Created
+ Status
+ Actions
+
+
+
+ {filteredJobs.map((row, index) => (
+
+
+ onSelectJob(row.id)}
+ />
+
+
+ {
+ router.push({
+ pathname: "/chat",
+ query: {
+ job: row.id,
+ },
+ });
+ }}
+ >
+
+
+
+
+
+
+ {
+ onFavorite([row.id], "favorite", !row.favorite);
+ row.favorite = !row.favorite;
+ }}
+ >
+
+
+
+
+
+
+ {row.id}
+
+
+ {row.url}
+
+
+
+ {JSON.stringify(row.elements)}
+
+
+
+
+ }
+ aria-controls="panel1a-content"
+ id="panel1a-header"
sx={{
- maxHeight: 150,
- overflow: "auto",
- width: "100%",
+ minHeight: 0,
+ "&.Mui-expanded": { minHeight: 0 },
}}
>
-
- Show Result
-
-
-
-
-
-
- {JSON.stringify(row.result, null, 2)}
-
+
+ Show Result
+
+
+
+
+
+
+ {JSON.stringify(row.result, null, 2)}
+
+
+
+
+
+
+
+ {new Date(row.time_created).toLocaleString()}
+
+
+
+
+
+ {row.status}
-
-
-
-
-
- {new Date(row.time_created).toLocaleString()}
-
-
-
-
-
- {row.status}
-
-
-
-
-
-
-
-
-
- ))}
-
-
+
+
+
+
+
+
+
+
+ ))}
+
+
+
);
};
diff --git a/src/components/pages/home/home.tsx b/src/components/pages/home/home.tsx
new file mode 100644
index 0000000..da5732f
--- /dev/null
+++ b/src/components/pages/home/home.tsx
@@ -0,0 +1,107 @@
+"use client";
+
+import React, { useState, useEffect, useRef } from "react";
+import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
+import { useRouter } from "next/router";
+import { Element, Result } from "@/types";
+import { ElementTable, JobSubmitter } from "@/components/submit/job-submitter";
+import { useJobSubmitterProvider } from "@/components/submit/job-submitter/provider";
+
+export const Home = () => {
+ const {
+ submittedURL,
+ setSubmittedURL,
+ rows,
+ setRows,
+ results,
+ snackbarOpen,
+ setSnackbarOpen,
+ snackbarMessage,
+ snackbarSeverity,
+ } = useJobSubmitterProvider();
+ const router = useRouter();
+ const { elements, url } = router.query;
+
+ const resultsRef = useRef(null);
+
+ useEffect(() => {
+ if (elements) {
+ setRows(JSON.parse(elements as string));
+ }
+ if (url) {
+ setSubmittedURL(url as string);
+ }
+ }, [elements, url]);
+
+ useEffect(() => {
+ if (results && resultsRef.current) {
+ resultsRef.current.scrollIntoView({ behavior: "smooth" });
+ }
+ }, [results]);
+
+ const handleCloseSnackbar = () => {
+ setSnackbarOpen(false);
+ };
+
+ const ErrorSnackbar = () => {
+ return (
+
+
+ {snackbarMessage}
+
+
+ );
+ };
+
+ const NotifySnackbar = () => {
+ const goTo = () => {
+ router.push("/jobs");
+ };
+
+ const action = (
+
+ );
+
+ return (
+
+
+ {snackbarMessage}
+
+
+ );
+ };
+
+ return (
+
+
+
+ {submittedURL.length ? (
+
+ ) : null}
+
+ {snackbarSeverity === "info" ? : }
+
+ );
+};
diff --git a/src/components/pages/home/index.ts b/src/components/pages/home/index.ts
new file mode 100644
index 0000000..84d36cd
--- /dev/null
+++ b/src/components/pages/home/index.ts
@@ -0,0 +1 @@
+export * from "./home";
diff --git a/src/components/submit/index.ts b/src/components/submit/index.ts
index 400e380..7ddcadf 100644
--- a/src/components/submit/index.ts
+++ b/src/components/submit/index.ts
@@ -1,2 +1 @@
-export * from "./ElementTable";
export * from "./job-submitter";
diff --git a/src/components/submit/ElementTable.tsx b/src/components/submit/job-submitter/element-table/element-table.tsx
similarity index 95%
rename from src/components/submit/ElementTable.tsx
rename to src/components/submit/job-submitter/element-table/element-table.tsx
index b29b0e3..d693346 100644
--- a/src/components/submit/ElementTable.tsx
+++ b/src/components/submit/job-submitter/element-table/element-table.tsx
@@ -15,9 +15,11 @@ import {
IconButton,
Tooltip,
useTheme,
+ Divider,
} from "@mui/material";
import AddIcon from "@mui/icons-material/Add";
-import { Element } from "../../types";
+import { Element } from "@/types";
+import { SiteMap } from "../site-map";
interface Props {
rows: Element[];
@@ -169,6 +171,13 @@ export const ElementTable = ({ rows, setRows, submittedURL }: Props) => {
+
+
);
};
diff --git a/src/components/submit/job-submitter/element-table/index.ts b/src/components/submit/job-submitter/element-table/index.ts
new file mode 100644
index 0000000..c8f10cf
--- /dev/null
+++ b/src/components/submit/job-submitter/element-table/index.ts
@@ -0,0 +1 @@
+export { ElementTable } from "./element-table";
diff --git a/src/components/submit/job-submitter/index.ts b/src/components/submit/job-submitter/index.ts
index d1879c4..08d060e 100644
--- a/src/components/submit/job-submitter/index.ts
+++ b/src/components/submit/job-submitter/index.ts
@@ -1 +1,2 @@
export { JobSubmitter } from "./job-submitter";
+export { ElementTable } from "./element-table";
diff --git a/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx b/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx
index cdbdfa3..5a29b51 100644
--- a/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx
+++ b/src/components/submit/job-submitter/job-submitter-input/job-submitter-input.tsx
@@ -1,26 +1,20 @@
-import React, { Dispatch } from "react";
+import React from "react";
import { TextField, Button, CircularProgress } from "@mui/material";
-import { Element } from "@/types";
+import { useJobSubmitterProvider } from "../provider";
export type JobSubmitterInputProps = {
- submittedURL: string;
- setSubmittedURL: Dispatch>;
- isValidURL: boolean;
urlError: string | null;
handleSubmit: () => void;
loading: boolean;
- rows: Element[];
};
export const JobSubmitterInput = ({
- submittedURL,
- setSubmittedURL,
- isValidURL,
- urlError,
handleSubmit,
loading,
- rows,
+ urlError,
}: JobSubmitterInputProps) => {
+ const { submittedURL, setSubmittedURL, isValidURL, rows } =
+ useJobSubmitterProvider();
return (
{
const handleMultiPageScrapeChange = () => {
diff --git a/src/components/submit/job-submitter/job-submitter.tsx b/src/components/submit/job-submitter/job-submitter.tsx
index 83b92e9..2a54a00 100644
--- a/src/components/submit/job-submitter/job-submitter.tsx
+++ b/src/components/submit/job-submitter/job-submitter.tsx
@@ -1,7 +1,6 @@
"use client";
-import React, { useEffect, useState, Dispatch } from "react";
-import { Element } from "@/types";
+import React, { useEffect, useState } from "react";
import { useAuth } from "@/contexts/AuthContext";
import { useRouter } from "next/router";
import { RawJobOptions } from "@/types/job";
@@ -10,21 +9,7 @@ import { JobSubmitterHeader } from "./job-submitter-header";
import { JobSubmitterInput } from "./job-submitter-input";
import { JobSubmitterOptions } from "./job-submitter-options";
import { ApiService } from "@/services";
-
-interface StateProps {
- submittedURL: string;
- setSubmittedURL: Dispatch>;
- rows: Element[];
- isValidURL: boolean;
- setIsValidUrl: Dispatch>;
- setSnackbarMessage: Dispatch>;
- setSnackbarOpen: Dispatch>;
- setSnackbarSeverity: Dispatch>;
-}
-
-interface Props {
- stateProps: StateProps;
-}
+import { useJobSubmitterProvider } from "./provider";
const initialJobOptions: RawJobOptions = {
multi_page_scrape: false,
@@ -32,7 +17,7 @@ const initialJobOptions: RawJobOptions = {
proxies: null,
};
-export const JobSubmitter = ({ stateProps }: Props) => {
+export const JobSubmitter = () => {
const { user } = useAuth();
const router = useRouter();
const { job_options } = router.query;
@@ -40,11 +25,13 @@ export const JobSubmitter = ({ stateProps }: Props) => {
const {
submittedURL,
rows,
+ siteMap,
setIsValidUrl,
setSnackbarMessage,
setSnackbarOpen,
setSnackbarSeverity,
- } = stateProps;
+ setSiteMap,
+ } = useJobSubmitterProvider();
const [urlError, setUrlError] = useState(null);
const [loading, setLoading] = useState(false);
@@ -87,7 +74,8 @@ export const JobSubmitter = ({ stateProps }: Props) => {
rows,
user,
jobOptions,
- customHeaders
+ customHeaders,
+ siteMap
)
.then(async (response) => {
if (!response.ok) {
@@ -120,31 +108,28 @@ export const JobSubmitter = ({ stateProps }: Props) => {
job_options as string,
setCustomJSONSelected,
setProxiesSelected,
- setJobOptions
+ setJobOptions,
+ setSiteMap
);
}
}, [job_options]);
return (
- <>
-
-
-
-
-
- >
+
+
+
+
+
);
};
diff --git a/src/components/submit/job-submitter/provider.tsx b/src/components/submit/job-submitter/provider.tsx
new file mode 100644
index 0000000..f8a3fda
--- /dev/null
+++ b/src/components/submit/job-submitter/provider.tsx
@@ -0,0 +1,84 @@
+import React, {
+ createContext,
+ PropsWithChildren,
+ useContext,
+ useState,
+ Dispatch,
+ useMemo,
+} from "react";
+import { Element, Result, SiteMap } from "@/types";
+
+type JobSubmitterProviderType = {
+ submittedURL: string;
+ setSubmittedURL: Dispatch>;
+ rows: Element[];
+ setRows: Dispatch>;
+ results: Result;
+ setResults: Dispatch>;
+ snackbarOpen: boolean;
+ setSnackbarOpen: Dispatch>;
+ snackbarMessage: string;
+ setSnackbarMessage: Dispatch>;
+ snackbarSeverity: string;
+ setSnackbarSeverity: Dispatch>;
+ isValidURL: boolean;
+ setIsValidUrl: Dispatch>;
+ siteMap: SiteMap | null;
+ setSiteMap: Dispatch>;
+};
+
+const JobSubmitterProvider = createContext(
+ {} as JobSubmitterProviderType
+);
+
+export const Provider = ({ children }: PropsWithChildren) => {
+ const [submittedURL, setSubmittedURL] = useState("");
+ const [rows, setRows] = useState([]);
+ const [results, setResults] = useState({});
+ const [snackbarOpen, setSnackbarOpen] = useState(false);
+ const [snackbarMessage, setSnackbarMessage] = useState("");
+ const [snackbarSeverity, setSnackbarSeverity] = useState("error");
+ const [isValidURL, setIsValidUrl] = useState(true);
+ const [siteMap, setSiteMap] = useState(null);
+
+ const value: JobSubmitterProviderType = useMemo(
+ () => ({
+ submittedURL,
+ setSubmittedURL,
+ rows,
+ setRows,
+ results,
+ setResults,
+ snackbarOpen,
+ setSnackbarOpen,
+ snackbarMessage,
+ setSnackbarMessage,
+ snackbarSeverity,
+ setSnackbarSeverity,
+ isValidURL,
+ setIsValidUrl,
+ siteMap,
+ setSiteMap,
+ }),
+ [
+ submittedURL,
+ rows,
+ results,
+ snackbarOpen,
+ snackbarMessage,
+ snackbarSeverity,
+ isValidURL,
+ siteMap,
+ ]
+ );
+
+ return (
+
+ {children}
+
+ );
+};
+
+export const useJobSubmitterProvider = () => {
+ return useContext(JobSubmitterProvider);
+};
diff --git a/src/components/submit/job-submitter/site-map/index.ts b/src/components/submit/job-submitter/site-map/index.ts
new file mode 100644
index 0000000..0e74d0f
--- /dev/null
+++ b/src/components/submit/job-submitter/site-map/index.ts
@@ -0,0 +1 @@
+export * from "./site-map";
diff --git a/src/components/submit/job-submitter/site-map/site-map-input/index.ts b/src/components/submit/job-submitter/site-map/site-map-input/index.ts
new file mode 100644
index 0000000..31de220
--- /dev/null
+++ b/src/components/submit/job-submitter/site-map/site-map-input/index.ts
@@ -0,0 +1 @@
+export * from "./site-map-input";
diff --git a/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.module.css b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.module.css
new file mode 100644
index 0000000..b83bdc8
--- /dev/null
+++ b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.module.css
@@ -0,0 +1,21 @@
+.button {
+ height: 3rem;
+ width: 2rem;
+
+ color: #ffffff;
+ font-weight: 600;
+ border-radius: 0.375rem;
+ transition: transform 0.2s ease-in-out;
+ transform: scale(1);
+ &:hover {
+ transform: scale(1.05);
+ }
+}
+
+.remove {
+ background-color: var(--delete-red) !important;
+}
+
+.remove:hover {
+ background-color: var(--delete-red-hover) !important;
+}
diff --git a/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.tsx b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.tsx
new file mode 100644
index 0000000..43195bd
--- /dev/null
+++ b/src/components/submit/job-submitter/site-map/site-map-input/site-map-input.tsx
@@ -0,0 +1,135 @@
+import { useState } from "react";
+import { useJobSubmitterProvider } from "../../provider";
+import {
+ MenuItem,
+ Select,
+ TextField,
+ FormControl,
+ Button,
+ Checkbox,
+ FormControlLabel,
+} from "@mui/material";
+import { ActionOption } from "@/types/job";
+import classes from "./site-map-input.module.css";
+import { clsx } from "clsx";
+
+export type SiteMapInputProps = {
+ disabled?: boolean;
+ xpath?: string;
+ option?: ActionOption;
+ clickOnce?: boolean;
+ input?: string;
+};
+
+export const SiteMapInput = ({
+ disabled,
+ xpath,
+ option,
+ clickOnce,
+ input,
+}: SiteMapInputProps) => {
+ console.log(clickOnce);
+ const [optionState, setOptionState] = useState(
+ option || "click"
+ );
+ const [xpathState, setXpathState] = useState(xpath || "");
+ const [clickOnceState, setClickOnceState] = useState(
+ clickOnce || false
+ );
+ const [inputState, setInputState] = useState(input || "");
+
+ const { siteMap, setSiteMap } = useJobSubmitterProvider();
+
+ const handleAdd = () => {
+ if (!siteMap) return;
+
+ console.log(optionState, xpathState, clickOnceState, inputState);
+
+ setSiteMap((prevSiteMap) => ({
+ ...prevSiteMap,
+ actions: [
+ {
+ type: optionState,
+ xpath: xpathState,
+ name: "",
+ do_once: clickOnceState,
+ input: inputState,
+ },
+ ...(prevSiteMap?.actions || []),
+ ],
+ }));
+
+ setXpathState("");
+ };
+
+ const handleRemove = () => {
+ if (!siteMap) return;
+
+ setSiteMap((prevSiteMap) => ({
+ ...prevSiteMap,
+ actions: (prevSiteMap?.actions || []).slice(0, -1),
+ }));
+ };
+
+ return (
+
+
+
+
+
+ {optionState === "input" && (
+ setInputState(e.target.value)}
+ disabled={disabled}
+ />
+ )}
+ setXpathState(e.target.value)}
+ disabled={disabled}
+ />
+ {disabled ? (
+
+ ) : (
+
+ )}
+
+ {!disabled && (
+
setClickOnceState(!clickOnceState)}
+ />
+ }
+ />
+ )}
+
+ );
+};
diff --git a/src/components/submit/job-submitter/site-map/site-map.tsx b/src/components/submit/job-submitter/site-map/site-map.tsx
new file mode 100644
index 0000000..d301edf
--- /dev/null
+++ b/src/components/submit/job-submitter/site-map/site-map.tsx
@@ -0,0 +1,70 @@
+import { useEffect, useState } from "react";
+import { useJobSubmitterProvider } from "../provider";
+import { Button, Divider, Typography, useTheme } from "@mui/material";
+import { SiteMapInput } from "./site-map-input";
+
+export const SiteMap = () => {
+ const { siteMap, setSiteMap } = useJobSubmitterProvider();
+ const [showSiteMap, setShowSiteMap] = useState(false);
+ const theme = useTheme();
+
+ const handleCreateSiteMap = () => {
+ setSiteMap({ actions: [] });
+ setShowSiteMap(true);
+ };
+
+ const handleClearSiteMap = () => {
+ setSiteMap(null);
+ setShowSiteMap(false);
+ };
+
+ useEffect(() => {
+ if (siteMap) {
+ setShowSiteMap(true);
+ }
+ }, [siteMap]);
+
+ return (
+
+ {siteMap ? (
+
+ ) : (
+
+ )}
+ {showSiteMap && (
+
+
+ {siteMap?.actions && siteMap?.actions.length > 0 && (
+ <>
+
+
+ Site Map Actions
+
+ >
+ )}
+
+ {siteMap?.actions.reverse().map((action, index) => (
+ -
+
+ Action {index + 1}:
+
+
+
+ ))}
+
+
+ )}
+
+ );
+};
diff --git a/src/lib/helpers/parse-job-options.ts b/src/lib/helpers/parse-job-options.ts
index e5c22bc..cb27d09 100644
--- a/src/lib/helpers/parse-job-options.ts
+++ b/src/lib/helpers/parse-job-options.ts
@@ -1,15 +1,17 @@
import { Dispatch, SetStateAction } from "react";
-import { RawJobOptions } from "@/types";
+import { RawJobOptions, SiteMap } from "@/types";
export const parseJobOptions = (
job_options: string,
setCustomJSONSelected: Dispatch>,
setProxiesSelected: Dispatch>,
- setJobOptions: Dispatch>
+ setJobOptions: Dispatch>,
+ setSiteMap: Dispatch>
) => {
if (job_options) {
const jsonOptions = JSON.parse(job_options as string);
+ console.log(jsonOptions);
const newJobOptions: RawJobOptions = {
multi_page_scrape: false,
custom_headers: null,
@@ -31,6 +33,10 @@ export const parseJobOptions = (
newJobOptions.proxies = jsonOptions.proxies.join(",");
}
+ if (jsonOptions.site_map) {
+ setSiteMap(jsonOptions.site_map);
+ }
+
setJobOptions(newJobOptions);
}
};
diff --git a/src/pages/index.tsx b/src/pages/index.tsx
index 34ad12f..c7bbc8e 100644
--- a/src/pages/index.tsx
+++ b/src/pages/index.tsx
@@ -1,117 +1,10 @@
-"use client";
-
-import React, { useState, useEffect, useRef } from "react";
-import { Button, Container, Box, Snackbar, Alert } from "@mui/material";
-import { useRouter } from "next/router";
-import { Element, Result } from "@/types";
-import { ElementTable } from "@/components/submit";
-import { JobSubmitter } from "@/components/submit/job-submitter";
-
-const Home = () => {
- const router = useRouter();
- const { elements, url } = router.query;
-
- const [submittedURL, setSubmittedURL] = useState("");
- const [rows, setRows] = useState([]);
- const [results, setResults] = useState({});
- const [snackbarOpen, setSnackbarOpen] = useState(false);
- const [snackbarMessage, setSnackbarMessage] = useState("");
- const [snackbarSeverity, setSnackbarSeverity] = useState("error");
- const [isValidURL, setIsValidUrl] = useState(true);
-
- const resultsRef = useRef(null);
-
- useEffect(() => {
- if (elements) {
- setRows(JSON.parse(elements as string));
- }
- if (url) {
- setSubmittedURL(url as string);
- }
- }, [elements, url]);
-
- useEffect(() => {
- if (results && resultsRef.current) {
- resultsRef.current.scrollIntoView({ behavior: "smooth" });
- }
- }, [results]);
-
- const handleCloseSnackbar = () => {
- setSnackbarOpen(false);
- };
-
- const ErrorSnackbar = () => {
- return (
-
-
- {snackbarMessage}
-
-
- );
- };
-
- const NotifySnackbar = () => {
- const goTo = () => {
- router.push("/jobs");
- };
-
- const action = (
-
- );
-
- return (
-
-
- {snackbarMessage}
-
-
- );
- };
+import { Provider as JobSubmitterProvider } from "@/components/submit/job-submitter/provider";
+import { Home } from "@/components/pages/home/home";
+export default function Main() {
return (
-
-
-
- {submittedURL.length ? (
-
- ) : null}
-
- {snackbarSeverity === "info" ? : }
-
+
+
+
);
-};
-
-export default Home;
+}
diff --git a/src/services/api-service/functions/submit-job.ts b/src/services/api-service/functions/submit-job.ts
index fc36aa7..e009e22 100644
--- a/src/services/api-service/functions/submit-job.ts
+++ b/src/services/api-service/functions/submit-job.ts
@@ -1,9 +1,12 @@
+import { SiteMap } from "@/types/job";
+
export const submitJob = async (
submittedURL: string,
rows: any[],
user: any,
jobOptions: any,
- customHeaders: any
+ customHeaders: any,
+ siteMap: SiteMap | null
) => {
return await fetch(`/api/submit-scrape-job`, {
method: "POST",
@@ -18,6 +21,7 @@ export const submitJob = async (
...jobOptions,
custom_headers: customHeaders || {},
proxies: jobOptions.proxies ? jobOptions.proxies.split(",") : [],
+ site_map: siteMap,
},
},
}),
diff --git a/src/styles/globals.css b/src/styles/globals.css
index 033d23e..eb3c657 100644
--- a/src/styles/globals.css
+++ b/src/styles/globals.css
@@ -2,6 +2,11 @@
@tailwind components;
@tailwind utilities;
+:root {
+ --delete-red: #ef4444;
+ --delete-red-hover: #ff6969;
+}
+
#__next {
height: 100%;
}
diff --git a/src/styles/themes.ts b/src/styles/themes.ts
index acc8284..f314084 100644
--- a/src/styles/themes.ts
+++ b/src/styles/themes.ts
@@ -34,6 +34,12 @@ const commonThemeOptions = {
h4: {
fontWeight: 500,
},
+ h5: {
+ fontWeight: 500,
+ },
+ h6: {
+ fontWeight: 500,
+ },
body1: {
fontFamily: '"Schibsted Grotesk", sans-serif',
},
@@ -175,6 +181,9 @@ const darkTheme = createTheme({
h5: {
color: "#ffffff",
},
+ h6: {
+ color: "#ffffff",
+ },
body1: {
...commonThemeOptions.typography.body1,
color: "#ffffff",
diff --git a/src/types/job.ts b/src/types/job.ts
index 93bc381..785eb93 100644
--- a/src/types/job.ts
+++ b/src/types/job.ts
@@ -16,6 +16,7 @@ export type JobOptions = {
multi_page_scrape: boolean;
custom_headers: null | string;
proxies: string[];
+ site_map?: SiteMap;
};
export type RawJobOptions = {
@@ -23,3 +24,17 @@ export type RawJobOptions = {
custom_headers: string | null;
proxies: string | null;
};
+
+export type ActionOption = "click" | "input";
+
+export type Action = {
+ type: ActionOption;
+ xpath: string;
+ name: string;
+ do_once?: boolean;
+ input?: string;
+};
+
+export type SiteMap = {
+ actions: Action[];
+};