Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ollama container for LLM capabilities and enhanced employee profiling #13

Merged
merged 35 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1191322
Prueba Marcos Front-end
karhu-es Jan 1, 2024
17c1fdb
LLM
karhu-es Jan 31, 2024
64f6726
Merge branch '10-integration-of-ollama-llm-container' of https://gith…
karhu-es Jan 31, 2024
56f0f8a
FIX socialIcons Frontend
karhu-es Jan 31, 2024
1e796f1
Delete OPENAI API Key parameter
xampla Feb 6, 2024
baf16c5
Update utils.py
xampla Feb 6, 2024
f55dca5
Update ollama.py
xampla Feb 6, 2024
445602f
Update requirements.txt
xampla Feb 6, 2024
f28aa2e
Update views.py
xampla Feb 6, 2024
fcc1737
Update docker-compose.yml
xampla Feb 6, 2024
0e5261b
Update models.py
xampla Feb 6, 2024
7297e6c
Update people.py
xampla Feb 6, 2024
970dd8c
Update google_data.py
xampla Feb 6, 2024
bdc65c9
Update index.js
xampla Feb 6, 2024
c04ad6f
Update people.py
xampla Feb 6, 2024
7c5e22c
Update people.py
xampla Feb 6, 2024
7f736d3
Update people_analisys.py
xampla Feb 6, 2024
614d152
Update ollama.py
xampla Feb 6, 2024
fa375aa
Update settings.py
xampla Feb 6, 2024
e140edc
Update requirements.txt
xampla Feb 6, 2024
7123413
Update ollama.py
xampla Feb 6, 2024
cfb2029
Update people_analisys.py
xampla Feb 7, 2024
3adc7e6
Update google_data.py
xampla Feb 7, 2024
1e6ebc4
Update models.py
xampla Feb 7, 2024
75cba21
Update people.py
xampla Feb 7, 2024
bfff3ad
Update utils.py
xampla Feb 7, 2024
b07a890
Delete infohound_diagram.jpg
xampla Feb 7, 2024
ef3b68b
Add files via upload
xampla Feb 7, 2024
09d757e
Update README.md
xampla Feb 7, 2024
6c1f11c
Delete infohound_diagram.jpg
xampla Feb 7, 2024
ec0aea3
Add files via upload
xampla Feb 7, 2024
9066ec1
Add files via upload
xampla Feb 7, 2024
d6c8f26
Delete infohound_diagram.jpg
xampla Feb 7, 2024
845ada9
Add files via upload
xampla Feb 7, 2024
7772390
Update README.md
xampla Feb 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ services:
- db
- redis
- celery_worker
- ollama
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=infohound_db
- REDIS_HOST=redis
- REDIS_PORT=6379
command: sh -c "python manage.py makemigrations infohound && python manage.py migrate && python manage.py runserver 0.0.0.0:8000"

celery_worker:
build:
context: .
Expand All @@ -35,10 +37,12 @@ services:
- REDIS_HOST=redis
- REDIS_PORT=6379
command: sh -c "celery -A infohound_project worker --loglevel=info"

redis:
image: redis:latest
ports:
- '6378:6379'

db:
image: postgres:12
volumes:
Expand All @@ -47,5 +51,21 @@ services:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=infohound_db

ollama:
image: ollama/ollama:latest
ports:
- '11434:11434'
# Uncomment if you want to use GPU. More info: https://ollama.ai/blog/ollama-is-now-available-as-an-official-docker-image
#environment:
# - gpus=all
#deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
volumes:
postgres_data:

3 changes: 3 additions & 0 deletions infohound/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ class People(models.Model):
name = models.CharField(max_length=255)
phones = models.JSONField(default=list, null=True)
social_profiles = models.JSONField(default=list)
ocupation_summary = models.TextField(default="This profile doesn't have a description yet. You can use the profile analysis task to employ an AI-powered tool that examines the metadata and creates a description for you.")
raw_metadata = models.TextField (default=None)
url_img = models.TextField(default="https://static.thenounproject.com/png/994628-200.png")
source = models.CharField(max_length=255)
domain = models.ForeignKey(Domain, on_delete=models.CASCADE)

Expand Down
76 changes: 40 additions & 36 deletions infohound/static/infohound/js/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -186,34 +186,34 @@ function loadPeople() {
person_name = person.name.length == 0 ? "[Not found]" : person_name

card.innerHTML = `
<div class="row g-0">
<div class="col-md-4 d-flex align-items-center justify-content-center">
<svg xmlns="http://www.w3.org/2000/svg" width="80%" height="80%" fill="currentColor" class="bi bi-person-circle" viewBox="0 0 16 16">
<path d="M11 6a3 3 0 1 1-6 0 3 3 0 0 1 6 0z"/>
<path fill-rule="evenodd" d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm8-7a7 7 0 0 0-5.468 11.37C3.242 11.226 4.805 10 8 10s4.757 1.225 5.468 2.37A7 7 0 0 0 8 1z"/>
</svg>
<div class="card-body">
<div class="row">
<div class="col-md-3 p-1">
<img src="${person.url_img}" class="img-fluid float-left">
</div>
<div class="col-md-9">
<h5 class="card-title">${person_name}</h5>
<div class="d-flex align-items-center mb-2">
<i class="bi bi-envelope-fill me-2"></i>
<span class="me-3">${person.emails}</span>
<i class="bi bi-telephone-fill me-2"></i>
<span class="me-3">${person.phones}</span>
<i class="bi bi-key-fill me-2"></i>
<span class="me-3">${person.keys}</span>
<i class="bi bi-person-fill me-2"></i>
<span>${person.accounts}</span>
</div>
<div class="col-md-8">
<div class="card-body">
<h5 class="card-title">${person_name}</h5>
<div class="d-flex align-items-center mb-2">
<i class="bi bi-envelope-fill me-2"></i>
<span class="me-3">${person.emails}</span>
<i class="bi bi-telephone-fill me-2"></i>
<span class="me-3">${person.phones}</span>
<i class="bi bi-key-fill me-2"></i>
<span class="me-3">${person.keys}</span>
<i class="bi bi-person-fill me-2"></i>
<span>${person.accounts}</span>
</div>
<hr>
<div class="d-flex align-items-center justify-content-center">
${socialIcons}
</div>
<div class="personID d-none">${person.id}</div>
</div>
<div class="col-md-12">
<small>${person.ocupation_summary}</small>
</div>
<hr>
<div class="d-flex align-items-center justify-content-center">
${socialIcons}
</div>
<div class="personID d-none">${person.id}</div>
</div>
</div>
</div>
`;
col.appendChild(card)
cardContainer.append(col);
Expand Down Expand Up @@ -310,18 +310,14 @@ function loadTasks() {
"findEmailsTask", "findEmailsFromURLsTask", "findSocialProfilesByEmailTask"]
data.forEach(task => {
const card = document.createElement('div');
card.className = 'card shadow mb-3';
card.className = 'col-md-4 p-3';
b = `
<div class="col-1 d-flex justify-content-center align-items-center">
<button id="${task.id}" type="button" class="btn btn-primary task-executer">Execute</button>
</div>
<button id="${task.id}" type="button" class="btn btn-primary task-executer">Execute</button>
`;
pb = "";
if(task.state == "PENDING") {
b = `
<div class="col-1 d-flex justify-content-center align-items-center">
<button type="button" class="btn btn-info" disabled>${task.state}</button>
</div>
<button type="button" class="btn btn-info" disabled>${task.state}</button>
`
pb = `
<div class="progress" role="progressbar" aria-label="Animated striped example" aria-valuenow="100" aria-valuemin="0" aria-valuemax="100">
Expand Down Expand Up @@ -363,16 +359,24 @@ function loadTasks() {
}

card.innerHTML = `
<div class="card-body">
<div class="card shadow h-100">
<div class="card-body d-flex flex-column">
<div class="row">
<div class="col-11">
<div class="col-md-12">
${h5}
<p class="card-text">${task.description}</p>
${pb}
</div>
${b}
</div>
<div class="row flex-fill">
<div class="col-md-12 d-flex justify-content-end align-items-end">
${b}
</div>
</div>
<div class="col-md-12 pt-1">
${pb}
</div>
</div>
</div>
`;
if (task.type == "Retrieve") {
taskRetrievalContainer.appendChild(card);
Expand Down
18 changes: 15 additions & 3 deletions infohound/tasks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from infohound.tool.retriever_modules import domains,subdomains,urls,files,emails,people,dorks
from infohound.tool.analysis_modules import domain_analysis,email_analysis,files_analysis,usernames_analysis
from infohound.tool.analysis_modules import domain_analysis,email_analysis,files_analysis,usernames_analysis,people_analisys
from celery import shared_task
import trio
import importlib

# ------------------------------------- #
# ------------- RETRIEVAL ------------- #
# ------------------------------------- #

@shared_task(bind=True, name="get_whois_info")
def getWhoisInfoTask(self, domain):
Expand Down Expand Up @@ -50,9 +53,14 @@ def executeDorksTask(self, domain):
def findEmailsFromDorksTask(self, domain):
emails.findEmailsFromDorks(domain)

@shared_task(bind=True, name="find_people_from_google")
def findPeopleFromGoogleTask(self, domain):
people.findPeopleFromGoogle(domain)


# -------------ANALYSIS-------------
# ------------------------------------- #
# ------------- ANALYSIS -------------- #
# ------------------------------------- #

@shared_task(bind=True, name="subdomain_take_over_analysis")
def subdomainTakeOverAnalysisTask(self, domain):
domain_analysis.subdomainTakeOverAnalysis(domain)
Expand Down Expand Up @@ -89,6 +97,10 @@ def findRegisteredSitesTask(self, domain):
def checkBreachTask(self, domain):
email_analysis.checkBreach(domain)

@shared_task(bind=True, name="summarize_profile")
def summarize_profile(self, domain):
people_analisys.summarize_profile(domain)

# --------------CUSTOM--------------

@shared_task(bind=True, name="custom_task")
Expand Down
24 changes: 24 additions & 0 deletions infohound/tool/ai_assistant/ollama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from ollama import Client
from infohound_project.settings import OLLAMA_URL,OLLAMA_MODEL

def check_or_pull_model(client):
models = client.list()
present = False
for model in models["models"]:
if OLLAMA_MODEL == model["name"].split(":")[0]:
present = True
if not present:
client.pull(OLLAMA_MODEL)

def ollama_flexible_prompt(in_prompt):
client = Client(host=OLLAMA_URL)
check_or_pull_model(client)
desc = None
try:
res = client.generate(model=OLLAMA_MODEL,prompt=in_prompt)
except Exception as e:
print(f"Could not call Ollama instance: {e}")

if "response" in res:
desc = res["response"].strip()
return desc
17 changes: 17 additions & 0 deletions infohound/tool/analysis_modules/people_analisys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import time
from infohound.models import People
from infohound.tool.ai_assistant import ollama

def summarize_profile(domain_id):
queryset = People.objects.filter(domain_id=domain_id, ocupation_summary__contains="This profile doesn't have a description yet")

for entry in queryset.iterator():
try:
summarize_prompt = "Summarize the ocupation of the person in just 150 words given the following data: "
raw_data = entry.raw_metadata
print ("Executing AI-Powered Profile Analisis of: " + entry.name)
entry.ocupation_summary = ollama.ollama_flexible_prompt(summarize_prompt + raw_data)
print ("Summary: " +entry.ocupation_summary)
entry.save()
except Exception as e:
print(f"Error inesperado: {str(e)}")
45 changes: 44 additions & 1 deletion infohound/tool/data_sources/google_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import requests
import json
import html
import time
import urllib.parse
import infohound.tool.infohound_utils as infohound_utils
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -50,6 +51,48 @@ def getUrls(query):
#- files
#- url

def discoverPeople (query):
start = 1
total_results = 0
total_gathered = 0
limit = False
results = True
people = []

print("Testing query: " + query)

while results and start < 100 and not limit:
payload = {"key":API_KEY,"cx":ID,"start":start,"q":query}
res = requests.get("https://www.googleapis.com/customsearch/v1",params=payload)
data = json.loads(res.text)
if "error" in data:
print(data["error"]["status"])
limit = True
else:
if start == 1:
total_results = data["searchInformation"]["totalResults"]
if "items" in data:
for item in data["items"]:
try:
url = item["link"]
first_name = item["pagemap"]["metatags"][0]["profile:first_name"]
last_name = item["pagemap"]["metatags"][0]["profile:last_name"]
url_img = item["pagemap"]["cse_image"][0]["src"]
name = f"{first_name} {last_name}"
people.append((name,url,json.dumps(item),url_img))
print("Added: " + name)
total_gathered = total_gathered + 1
except KeyError as e:
print(f"Error: The key '{e.args[0]}' is not present in the results.")
except Exception as e:
print(f"Unexpected error: {str(e)}")
else:
results = False
start = start + 10
time.sleep(1)

print("Found "+str(total_results)+" and added "+str(total_gathered))
return (people)

def discoverEmails(domain):
emails = []
Expand Down Expand Up @@ -179,4 +222,4 @@ def discoverSocialMediaByDorks(domain,email):
return data




2 changes: 0 additions & 2 deletions infohound/tool/infohound_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def extractSocialInfo(text):
if t is not None:
data.append(t.group(0))



# Twitter
regex = r"(http(s)?:\/\/)?([\w]+\.)?twitter\.com\/[^&\/?\"\%]*"
t = re.search(regex, text)
Expand Down
Loading