Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(scan): fix clocked and scheduled scan not working #182

Merged
merged 4 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -1139,3 +1139,31 @@ def extract_columns(row, columns):
list: Extracted values from the specified columns.
"""
return [row[i] for i in columns]

def create_scan_object(host_id, engine_id, initiated_by_id=None):
'''
create task with pending status so that celery task will execute when
threads are free
Args:
host_id: int: id of Domain model
engine_id: int: id of EngineType model
initiated_by_id: int : id of User model (Optional)
'''
# get current time
current_scan_time = timezone.now()
# fetch engine and domain object
engine = EngineType.objects.get(pk=engine_id)
domain = Domain.objects.get(pk=host_id)
scan = ScanHistory()
scan.scan_status = INITIATED_TASK
scan.domain = domain
scan.scan_type = engine
scan.start_scan_date = current_scan_time
if initiated_by_id:
user = User.objects.get(pk=initiated_by_id)
scan.initiated_by = user
scan.save()
# save last scan date for domain model
domain.start_scan_date = current_scan_time
domain.save()
return scan.id
16 changes: 12 additions & 4 deletions web/reNgine/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,6 @@
USE_L10N = True
USE_TZ = True

# Temporary fix for celery beat crash
# See https://github.com/yogeshojha/rengine/issues/971
DJANGO_CELERY_BEAT_TZ_AWARE = False

MEDIA_URL = '/media/'
FILE_UPLOAD_MAX_MEMORY_SIZE = 100000000
FILE_UPLOAD_PERMISSIONS = 0o644
Expand Down Expand Up @@ -264,6 +260,13 @@
'filename': 'celery.log',
'maxBytes': 1024 * 1024 * 100, # 100 mb
},
'celery_beat': {
'class': 'logging.handlers.RotatingFileHandler',
'formatter': 'simple',
'filename': 'celery_beat.log',
'maxBytes': 1024 * 1024 * 100, # 100 mb
'backupCount': 5,
},
},
'formatters': {
'default': {
Expand Down Expand Up @@ -328,6 +331,11 @@
'handlers': ['null'],
'propagate': False,
},
'django_celery_beat': {
'handlers': ['celery_beat', 'console'],
'level': 'DEBUG',
'propagate': True,
},
},
'root': {
'handlers': ['console'],
Expand Down
226 changes: 121 additions & 105 deletions web/reNgine/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def initiate_scan(
results_dir=RENGINE_RESULTS,
imported_subdomains=[],
out_of_scope_subdomains=[],
initiated_by_id=None,
url_filter=''):
"""Initiate a new scan.

Expand All @@ -74,134 +75,149 @@ def initiate_scan(
results_dir (str): Results directory.
imported_subdomains (list): Imported subdomains.
out_of_scope_subdomains (list): Out-of-scope subdomains.
url_filter (str): URL path. Default: ''
url_filter (str): URL path. Default: ''.
initiated_by (int): User ID initiating the scan.
"""

if CELERY_REMOTE_DEBUG:
debug()

# Get scan history
scan = ScanHistory.objects.get(pk=scan_history_id)
logger.info('Initiating scan on celery')
scan = None
try:
# Get scan engine
engine_id = engine_id or scan.scan_type.id # scan history engine_id
engine = EngineType.objects.get(pk=engine_id)

# Get scan engine
engine_id = engine_id or scan.scan_type.id # scan history engine_id
engine = EngineType.objects.get(pk=engine_id)
# Get YAML config
config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
gf_patterns = config.get(GF_PATTERNS, [])

# Get YAML config
config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
gf_patterns = config.get(GF_PATTERNS, [])
# Get domain and set last_scan_date
domain = Domain.objects.get(pk=domain_id)
domain.last_scan_date = timezone.now()
domain.save()

# Get domain and set last_scan_date
domain = Domain.objects.get(pk=domain_id)
domain.last_scan_date = timezone.now()
domain.save()
# Get path filter
url_filter = url_filter.rstrip('/')

# Get path filter
url_filter = url_filter.rstrip('/')
# for live scan scan history id is passed as scan_history_id
# and no need to create scan_history object

# Get or create ScanHistory() object
if scan_type == LIVE_SCAN: # immediate
if scan_type == SCHEDULED_SCAN: # scheduled
# we need to create scan_history object for each scheduled scan
scan_history_id = create_scan_object(
host_id=domain_id,
engine_id=engine_id,
initiated_by_id=initiated_by_id,
)
scan = ScanHistory.objects.get(pk=scan_history_id)
scan.scan_status = RUNNING_TASK
elif scan_type == SCHEDULED_SCAN: # scheduled
scan = ScanHistory()
scan.scan_status = INITIATED_TASK
scan.scan_type = engine
scan.celery_ids = [initiate_scan.request.id]
scan.domain = domain
scan.start_scan_date = timezone.now()
scan.tasks = engine.tasks
uuid_scan = uuid.uuid1()
scan.results_dir = f'{results_dir}/{domain.name}/scans/{uuid_scan}'
add_gf_patterns = gf_patterns and 'fetch_url' in engine.tasks
if add_gf_patterns and is_iterable(gf_patterns):
scan.used_gf_patterns = ','.join(gf_patterns)
scan.save()

try:
scan.scan_type = engine
scan.celery_ids = [initiate_scan.request.id]
scan.domain = domain
scan.start_scan_date = timezone.now()
scan.tasks = engine.tasks
uuid_scan = uuid.uuid1()
scan.results_dir = f'{results_dir}/{domain.name}/scans/{uuid_scan}'
add_gf_patterns = gf_patterns and 'fetch_url' in engine.tasks
if add_gf_patterns and is_iterable(gf_patterns):
scan.used_gf_patterns = ','.join(gf_patterns)
scan.save()

# Create scan results dir
os.makedirs(scan.results_dir, exist_ok=True)
except:
import traceback

traceback.print_exc()
raise

# Build task context
ctx = {
'scan_history_id': scan_history_id,
'engine_id': engine_id,
'domain_id': domain.id,
'results_dir': scan.results_dir,
'url_filter': url_filter,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
ctx_str = json.dumps(ctx, indent=2)

# Send start notif
logger.warning(f'Starting scan {scan_history_id} with context:\n{ctx_str}')
send_scan_notif.delay(
scan_history_id,
subscan_id=None,
engine_id=engine_id,
status=CELERY_TASK_STATUS_MAP[scan.scan_status])
# Build task context
ctx = {
'scan_history_id': scan_history_id,
'engine_id': engine_id,
'domain_id': domain.id,
'results_dir': scan.results_dir,
'url_filter': url_filter,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
ctx_str = json.dumps(ctx, indent=2)

# Send start notif
logger.warning(f'Starting scan {scan_history_id} with context:\n{ctx_str}')
send_scan_notif.delay(
scan_history_id,
subscan_id=None,
engine_id=engine_id,
status=CELERY_TASK_STATUS_MAP[scan.scan_status])

# Save imported subdomains in DB
save_imported_subdomains(imported_subdomains, ctx=ctx)

# Create initial subdomain in DB: make a copy of domain as a subdomain so
# that other tasks using subdomains can use it.
subdomain_name = domain.name
subdomain, _ = save_subdomain(subdomain_name, ctx=ctx)

# Save imported subdomains in DB
save_imported_subdomains(imported_subdomains, ctx=ctx)

# Create initial subdomain in DB: make a copy of domain as a subdomain so
# that other tasks using subdomains can use it.
subdomain_name = domain.name
subdomain, _ = save_subdomain(subdomain_name, ctx=ctx)
# If enable_http_crawl is set, create an initial root HTTP endpoint so that
# HTTP crawling can start somewhere
http_url = f'{domain.name}{url_filter}' if url_filter else domain.name
endpoint, _ = save_endpoint(
http_url,
ctx=ctx,
crawl=enable_http_crawl,
is_default=True,
subdomain=subdomain
)

# If enable_http_crawl is set, create an initial root HTTP endpoint so that
# HTTP crawling can start somewhere
http_url = f'{domain.name}{url_filter}' if url_filter else domain.name
endpoint, _ = save_endpoint(
http_url,
ctx=ctx,
crawl=enable_http_crawl,
is_default=True,
subdomain=subdomain
)
save_subdomain_metadata(subdomain, endpoint)

# Build Celery tasks, crafted according to the dependency graph below:
# subdomain_discovery --> port_scan --> fetch_url --> dir_file_fuzz
# osint vulnerability_scan
# osint dalfox xss scan
# screenshot
# waf_detection
workflow = chain(
group(
subdomain_discovery.si(ctx=ctx, description='Subdomain discovery'),
osint.si(ctx=ctx, description='OS Intelligence')
),
port_scan.si(ctx=ctx, description='Port scan'),
fetch_url.si(ctx=ctx, description='Fetch URL'),
group(
dir_file_fuzz.si(ctx=ctx, description='Directories & files fuzz'),
vulnerability_scan.si(ctx=ctx, description='Vulnerability scan'),
screenshot.si(ctx=ctx, description='Screenshot'),
waf_detection.si(ctx=ctx, description='WAF detection')
save_subdomain_metadata(subdomain, endpoint)


# Build Celery tasks, crafted according to the dependency graph below:
# subdomain_discovery --> port_scan --> fetch_url --> dir_file_fuzz
# osint vulnerability_scan
# osint dalfox xss scan
# screenshot
# waf_detection
workflow = chain(
group(
subdomain_discovery.si(ctx=ctx, description='Subdomain discovery'),
osint.si(ctx=ctx, description='OS Intelligence')
),
port_scan.si(ctx=ctx, description='Port scan'),
fetch_url.si(ctx=ctx, description='Fetch URL'),
group(
dir_file_fuzz.si(ctx=ctx, description='Directories & files fuzz'),
vulnerability_scan.si(ctx=ctx, description='Vulnerability scan'),
screenshot.si(ctx=ctx, description='Screenshot'),
waf_detection.si(ctx=ctx, description='WAF detection')
)
)
)

# Build callback
callback = report.si(ctx=ctx).set(link_error=[report.si(ctx=ctx)])
# Build callback
callback = report.si(ctx=ctx).set(link_error=[report.si(ctx=ctx)])

# Run Celery chord
logger.info(f'Running Celery workflow with {len(workflow.tasks) + 1} tasks')
task = chain(workflow, callback).on_error(callback).delay()
scan.celery_ids.append(task.id)
scan.save()
# Run Celery chord
logger.info(f'Running Celery workflow with {len(workflow.tasks) + 1} tasks')
task = chain(workflow, callback).on_error(callback).delay()
scan.celery_ids.append(task.id)
scan.save()

return {
'success': True,
'task_id': task.id
}
return {
'success': True,
'task_id': task.id
}

except Exception as e:
logger.exception(e)
if scan:
scan.scan_status = FAILED_TASK
scan.error_message = str(e)
scan.save()
return {
'success': False,
'error': str(e)
}

@app.task(name='initiate_subscan', bind=False, queue='subscan_queue')
def initiate_subscan(
Expand Down
2 changes: 1 addition & 1 deletion web/startScan/templates/startScan/schedule_scan_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
Will run exactly at {{ task.clocked.clocked_time}} UTC
{% endif %}
</td>
<td>{{ task.last_run_at|none_or_never }}</td>
<td>{{ task.last_run_at|none_or_never }} UTC</td>
<td class="text-center">{{ task.total_run_count }}</td>
<td class="text-center">
{% if task.one_off %}
Expand Down
14 changes: 14 additions & 0 deletions web/startScan/templates/startScan/schedule_scan_ui.html
Original file line number Diff line number Diff line change
Expand Up @@ -205,5 +205,19 @@ <h4 class="text-warning">Out of Scope Subdomains(Optional)</h4>
});
});

</script>
<script>
document.addEventListener('DOMContentLoaded', function() {
var form = document.getElementById('start-scan-form');
if (form) {
var timezoneOffsetField = document.createElement('input');
timezoneOffsetField.type = 'hidden';
timezoneOffsetField.name = 'timezone_offset';
timezoneOffsetField.value = new Date().getTimezoneOffset();
form.appendChild(timezoneOffsetField);
} else {
console.error("The form with the ID 'start-scan-form' was not found.");
}
});
</script>
{% endblock page_level_script %}
Loading