Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

URL reputation module #878

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions artemis/modules/url_reputation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import requests
from bs4 import BeautifulSoup
import re
import time
from urllib.parse import urlparse
from karton.core import Task
from artemis.binds import Service, TaskStatus, TaskType
from artemis.module_base import ArtemisBase
from artemis.task_utils import get_target_url

class URLReputation(ArtemisBase):
identity = "url_reputation"
filters = []
visited_url=[]
urls=[]
filters = [
{"type": TaskType.SERVICE.value, "service": Service.HTTP.value},
]

def remove_duplicates(self, link_list):
unique_links = []
for item in link_list:
match = re.search("(?P<url>https?://[^\s]+)", item)
if match is not None and match.group("url") not in unique_links:
unique_links.append(match.group("url"))
return unique_links

def check_url_status(url):
api_endpoint = "https://urlhaus-api.abuse.ch/v1/url/"
response = requests.post(api_endpoint, data={'url': url})

if response.status_code == 200:
data = response.json()
return data.get('query_status') == 'ok' and 'threat' in data
else:
print(f"API request failed for {url}")
return False


def extract_and_check_urls(url,hostname,max_links=162):
if url not in visited_url:
visited_url.append(url)
if hostname in url:
source_code=requests.get(url)
soup = BeautifulSoup(source_code.content, 'lxml')
for link in soup.find_all('a', href=True):
get_link=str(link.get('href'))
if(len(urlparse(get_link).netloc)==0):
get_link="http://"+hostname+"/"+get_link
if(hostname in get_link):
extract_and_check_urls(get_link,hostname)
else:
urls.append(str(link.get('href')))
if len(urls) >= max_links:
break



def run(self, task: Task) -> None:
target = get_target_url(task)
self.log.info(f"URL Reputation module running on {target}")
self.extract_and_check_urls(target)
if len(urls) == 0:
# On the default task result view only the interesting task results will be displayed
status = TaskStatus.INTERESTING
status_reason = "no url found"
else:
status = TaskStatus.OK
status_reason = "some url found"
print("UUUUURRRRRRLLLLL LIST",end=":")
print(urls)
self.db.save_task_result(
task=task,
status=status,
status_reason=status_reason,
# In the data dictionary, you may provide any additional results - the user will be able to view them
# in the interface on the single task result page.
data={"url":"someurl"},
)



if __name__ == "__main__":
URLReputation().loop()


10 changes: 10 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,16 @@ services:
restart: always
volumes: ["./docker/karton.ini:/etc/karton/karton.ini", "${DOCKER_COMPOSE_ADDITIONAL_SHARED_DIRECTORY:-./shared}:/shared/"]

karton-url_reputation:
build:
context: .
dockerfile: docker/Dockerfile
command: "python3 -m artemis.modules.url_reputation"
depends_on: [karton-system]
env_file: .env
restart: always
volumes: ["./docker/karton.ini:/etc/karton/karton.ini", "${DOCKER_COMPOSE_ADDITIONAL_SHARED_DIRECTORY:-./shared}:/shared/"]

karton-ftp_bruter:
build:
context: .
Expand Down