Skip to content

Commit

Permalink
fix(scan): fix clocked and scheduled scan not working (#182)
Browse files Browse the repository at this point in the history
* fix(scan): fix clocked and scheduled scan not working

* fix(scan): store start datetime of schedule scan in UTC

* fix(celery): add more DEBUG error for celery beat (datetime of tasks ...)

* style(time): add UTC to the schedule time
  • Loading branch information
psyray authored Sep 4, 2024
1 parent 7ff6d01 commit 4d35400
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 161 deletions.
28 changes: 28 additions & 0 deletions web/reNgine/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -1139,3 +1139,31 @@ def extract_columns(row, columns):
list: Extracted values from the specified columns.
"""
return [row[i] for i in columns]

def create_scan_object(host_id, engine_id, initiated_by_id=None):
'''
create task with pending status so that celery task will execute when
threads are free
Args:
host_id: int: id of Domain model
engine_id: int: id of EngineType model
initiated_by_id: int : id of User model (Optional)
'''
# get current time
current_scan_time = timezone.now()
# fetch engine and domain object
engine = EngineType.objects.get(pk=engine_id)
domain = Domain.objects.get(pk=host_id)
scan = ScanHistory()
scan.scan_status = INITIATED_TASK
scan.domain = domain
scan.scan_type = engine
scan.start_scan_date = current_scan_time
if initiated_by_id:
user = User.objects.get(pk=initiated_by_id)
scan.initiated_by = user
scan.save()
# save last scan date for domain model
domain.start_scan_date = current_scan_time
domain.save()
return scan.id
16 changes: 12 additions & 4 deletions web/reNgine/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,6 @@
USE_L10N = True
USE_TZ = True

# Temporary fix for celery beat crash
# See https://github.com/yogeshojha/rengine/issues/971
DJANGO_CELERY_BEAT_TZ_AWARE = False

MEDIA_URL = '/media/'
FILE_UPLOAD_MAX_MEMORY_SIZE = 100000000
FILE_UPLOAD_PERMISSIONS = 0o644
Expand Down Expand Up @@ -264,6 +260,13 @@
'filename': 'celery.log',
'maxBytes': 1024 * 1024 * 100, # 100 mb
},
'celery_beat': {
'class': 'logging.handlers.RotatingFileHandler',
'formatter': 'simple',
'filename': 'celery_beat.log',
'maxBytes': 1024 * 1024 * 100, # 100 mb
'backupCount': 5,
},
},
'formatters': {
'default': {
Expand Down Expand Up @@ -328,6 +331,11 @@
'handlers': ['null'],
'propagate': False,
},
'django_celery_beat': {
'handlers': ['celery_beat', 'console'],
'level': 'DEBUG',
'propagate': True,
},
},
'root': {
'handlers': ['console'],
Expand Down
226 changes: 121 additions & 105 deletions web/reNgine/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def initiate_scan(
results_dir=RENGINE_RESULTS,
imported_subdomains=[],
out_of_scope_subdomains=[],
initiated_by_id=None,
url_filter=''):
"""Initiate a new scan.
Expand All @@ -74,134 +75,149 @@ def initiate_scan(
results_dir (str): Results directory.
imported_subdomains (list): Imported subdomains.
out_of_scope_subdomains (list): Out-of-scope subdomains.
url_filter (str): URL path. Default: ''
url_filter (str): URL path. Default: ''.
initiated_by (int): User ID initiating the scan.
"""

if CELERY_REMOTE_DEBUG:
debug()

# Get scan history
scan = ScanHistory.objects.get(pk=scan_history_id)
logger.info('Initiating scan on celery')
scan = None
try:
# Get scan engine
engine_id = engine_id or scan.scan_type.id # scan history engine_id
engine = EngineType.objects.get(pk=engine_id)

# Get scan engine
engine_id = engine_id or scan.scan_type.id # scan history engine_id
engine = EngineType.objects.get(pk=engine_id)
# Get YAML config
config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
gf_patterns = config.get(GF_PATTERNS, [])

# Get YAML config
config = yaml.safe_load(engine.yaml_configuration)
enable_http_crawl = config.get(ENABLE_HTTP_CRAWL, DEFAULT_ENABLE_HTTP_CRAWL)
gf_patterns = config.get(GF_PATTERNS, [])
# Get domain and set last_scan_date
domain = Domain.objects.get(pk=domain_id)
domain.last_scan_date = timezone.now()
domain.save()

# Get domain and set last_scan_date
domain = Domain.objects.get(pk=domain_id)
domain.last_scan_date = timezone.now()
domain.save()
# Get path filter
url_filter = url_filter.rstrip('/')

# Get path filter
url_filter = url_filter.rstrip('/')
# for live scan scan history id is passed as scan_history_id
# and no need to create scan_history object

# Get or create ScanHistory() object
if scan_type == LIVE_SCAN: # immediate
if scan_type == SCHEDULED_SCAN: # scheduled
# we need to create scan_history object for each scheduled scan
scan_history_id = create_scan_object(
host_id=domain_id,
engine_id=engine_id,
initiated_by_id=initiated_by_id,
)
scan = ScanHistory.objects.get(pk=scan_history_id)
scan.scan_status = RUNNING_TASK
elif scan_type == SCHEDULED_SCAN: # scheduled
scan = ScanHistory()
scan.scan_status = INITIATED_TASK
scan.scan_type = engine
scan.celery_ids = [initiate_scan.request.id]
scan.domain = domain
scan.start_scan_date = timezone.now()
scan.tasks = engine.tasks
uuid_scan = uuid.uuid1()
scan.results_dir = f'{results_dir}/{domain.name}/scans/{uuid_scan}'
add_gf_patterns = gf_patterns and 'fetch_url' in engine.tasks
if add_gf_patterns and is_iterable(gf_patterns):
scan.used_gf_patterns = ','.join(gf_patterns)
scan.save()

try:
scan.scan_type = engine
scan.celery_ids = [initiate_scan.request.id]
scan.domain = domain
scan.start_scan_date = timezone.now()
scan.tasks = engine.tasks
uuid_scan = uuid.uuid1()
scan.results_dir = f'{results_dir}/{domain.name}/scans/{uuid_scan}'
add_gf_patterns = gf_patterns and 'fetch_url' in engine.tasks
if add_gf_patterns and is_iterable(gf_patterns):
scan.used_gf_patterns = ','.join(gf_patterns)
scan.save()

# Create scan results dir
os.makedirs(scan.results_dir, exist_ok=True)
except:
import traceback

traceback.print_exc()
raise

# Build task context
ctx = {
'scan_history_id': scan_history_id,
'engine_id': engine_id,
'domain_id': domain.id,
'results_dir': scan.results_dir,
'url_filter': url_filter,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
ctx_str = json.dumps(ctx, indent=2)

# Send start notif
logger.warning(f'Starting scan {scan_history_id} with context:\n{ctx_str}')
send_scan_notif.delay(
scan_history_id,
subscan_id=None,
engine_id=engine_id,
status=CELERY_TASK_STATUS_MAP[scan.scan_status])
# Build task context
ctx = {
'scan_history_id': scan_history_id,
'engine_id': engine_id,
'domain_id': domain.id,
'results_dir': scan.results_dir,
'url_filter': url_filter,
'yaml_configuration': config,
'out_of_scope_subdomains': out_of_scope_subdomains
}
ctx_str = json.dumps(ctx, indent=2)

# Send start notif
logger.warning(f'Starting scan {scan_history_id} with context:\n{ctx_str}')
send_scan_notif.delay(
scan_history_id,
subscan_id=None,
engine_id=engine_id,
status=CELERY_TASK_STATUS_MAP[scan.scan_status])

# Save imported subdomains in DB
save_imported_subdomains(imported_subdomains, ctx=ctx)

# Create initial subdomain in DB: make a copy of domain as a subdomain so
# that other tasks using subdomains can use it.
subdomain_name = domain.name
subdomain, _ = save_subdomain(subdomain_name, ctx=ctx)

# Save imported subdomains in DB
save_imported_subdomains(imported_subdomains, ctx=ctx)

# Create initial subdomain in DB: make a copy of domain as a subdomain so
# that other tasks using subdomains can use it.
subdomain_name = domain.name
subdomain, _ = save_subdomain(subdomain_name, ctx=ctx)
# If enable_http_crawl is set, create an initial root HTTP endpoint so that
# HTTP crawling can start somewhere
http_url = f'{domain.name}{url_filter}' if url_filter else domain.name
endpoint, _ = save_endpoint(
http_url,
ctx=ctx,
crawl=enable_http_crawl,
is_default=True,
subdomain=subdomain
)

# If enable_http_crawl is set, create an initial root HTTP endpoint so that
# HTTP crawling can start somewhere
http_url = f'{domain.name}{url_filter}' if url_filter else domain.name
endpoint, _ = save_endpoint(
http_url,
ctx=ctx,
crawl=enable_http_crawl,
is_default=True,
subdomain=subdomain
)
save_subdomain_metadata(subdomain, endpoint)

# Build Celery tasks, crafted according to the dependency graph below:
# subdomain_discovery --> port_scan --> fetch_url --> dir_file_fuzz
# osint vulnerability_scan
# osint dalfox xss scan
# screenshot
# waf_detection
workflow = chain(
group(
subdomain_discovery.si(ctx=ctx, description='Subdomain discovery'),
osint.si(ctx=ctx, description='OS Intelligence')
),
port_scan.si(ctx=ctx, description='Port scan'),
fetch_url.si(ctx=ctx, description='Fetch URL'),
group(
dir_file_fuzz.si(ctx=ctx, description='Directories & files fuzz'),
vulnerability_scan.si(ctx=ctx, description='Vulnerability scan'),
screenshot.si(ctx=ctx, description='Screenshot'),
waf_detection.si(ctx=ctx, description='WAF detection')
save_subdomain_metadata(subdomain, endpoint)


# Build Celery tasks, crafted according to the dependency graph below:
# subdomain_discovery --> port_scan --> fetch_url --> dir_file_fuzz
# osint vulnerability_scan
# osint dalfox xss scan
# screenshot
# waf_detection
workflow = chain(
group(
subdomain_discovery.si(ctx=ctx, description='Subdomain discovery'),
osint.si(ctx=ctx, description='OS Intelligence')
),
port_scan.si(ctx=ctx, description='Port scan'),
fetch_url.si(ctx=ctx, description='Fetch URL'),
group(
dir_file_fuzz.si(ctx=ctx, description='Directories & files fuzz'),
vulnerability_scan.si(ctx=ctx, description='Vulnerability scan'),
screenshot.si(ctx=ctx, description='Screenshot'),
waf_detection.si(ctx=ctx, description='WAF detection')
)
)
)

# Build callback
callback = report.si(ctx=ctx).set(link_error=[report.si(ctx=ctx)])
# Build callback
callback = report.si(ctx=ctx).set(link_error=[report.si(ctx=ctx)])

# Run Celery chord
logger.info(f'Running Celery workflow with {len(workflow.tasks) + 1} tasks')
task = chain(workflow, callback).on_error(callback).delay()
scan.celery_ids.append(task.id)
scan.save()
# Run Celery chord
logger.info(f'Running Celery workflow with {len(workflow.tasks) + 1} tasks')
task = chain(workflow, callback).on_error(callback).delay()
scan.celery_ids.append(task.id)
scan.save()

return {
'success': True,
'task_id': task.id
}
return {
'success': True,
'task_id': task.id
}

except Exception as e:
logger.exception(e)
if scan:
scan.scan_status = FAILED_TASK
scan.error_message = str(e)
scan.save()
return {
'success': False,
'error': str(e)
}

@app.task(name='initiate_subscan', bind=False, queue='subscan_queue')
def initiate_subscan(
Expand Down
2 changes: 1 addition & 1 deletion web/startScan/templates/startScan/schedule_scan_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
Will run exactly at {{ task.clocked.clocked_time}} UTC
{% endif %}
</td>
<td>{{ task.last_run_at|none_or_never }}</td>
<td>{{ task.last_run_at|none_or_never }} UTC</td>
<td class="text-center">{{ task.total_run_count }}</td>
<td class="text-center">
{% if task.one_off %}
Expand Down
14 changes: 14 additions & 0 deletions web/startScan/templates/startScan/schedule_scan_ui.html
Original file line number Diff line number Diff line change
Expand Up @@ -205,5 +205,19 @@ <h4 class="text-warning">Out of Scope Subdomains(Optional)</h4>
});
});

</script>
<script>
document.addEventListener('DOMContentLoaded', function() {
var form = document.getElementById('start-scan-form');
if (form) {
var timezoneOffsetField = document.createElement('input');
timezoneOffsetField.type = 'hidden';
timezoneOffsetField.name = 'timezone_offset';
timezoneOffsetField.value = new Date().getTimezoneOffset();
form.appendChild(timezoneOffsetField);
} else {
console.error("The form with the ID 'start-scan-form' was not found.");
}
});
</script>
{% endblock page_level_script %}
Loading

0 comments on commit 4d35400

Please sign in to comment.