From ebd95fc327bee9914bf36e3d32c96e94114bac20 Mon Sep 17 00:00:00 2001
From: psyray <psyray@users.noreply.github.com>
Date: Tue, 27 Aug 2024 14:51:56 +0200
Subject: [PATCH 1/3] bug(fetch_url): check if there's a list of urls

---
 web/reNgine/tasks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/web/reNgine/tasks.py b/web/reNgine/tasks.py
index c1c32dd3..9c88640f 100644
--- a/web/reNgine/tasks.py
+++ b/web/reNgine/tasks.py
@@ -1795,6 +1795,11 @@ def fetch_url(self, urls=[], ctx={}, description=None):
 			ctx=ctx
 		)
 
+    # check if urls is empty
+	if not urls:
+		logger.warning("No URLs found. Exiting fetch_url.")
+		return
+
 	# Log initial URLs
 	logger.debug(f'Initial URLs: {urls}')
 

From b300bdcfdff1d504a6a1231a8b263963d97ee6ce Mon Sep 17 00:00:00 2001
From: psyray <psyray@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:07:03 +0200
Subject: [PATCH 2/3] bug(custom_header): rework the run command and correctly
 split parameters

---
 web/reNgine/common_func.py | 100 ++++++++++++++++-
 web/reNgine/tasks.py       | 216 +++++++++++++++----------------------
 2 files changed, 184 insertions(+), 132 deletions(-)

diff --git a/web/reNgine/common_func.py b/web/reNgine/common_func.py
index 1f8b7969..92bdf633 100644
--- a/web/reNgine/common_func.py
+++ b/web/reNgine/common_func.py
@@ -4,6 +4,8 @@
 import random
 import shutil
 import traceback
+import shlex
+import subprocess
 from time import sleep
 
 import humanize
@@ -1081,6 +1083,9 @@ def generate_header_param(custom_header, tool_name=None):
     Returns:
         str: Command-line parameter for the specified tool.
     """
+    logger.debug(f"Generating header parameters for tool: {tool_name}")
+    logger.debug(f"Input custom_header: {custom_header}")
+
     # Ensure the custom_header is a dictionary
     custom_header = parse_custom_header(custom_header)
 
@@ -1097,8 +1102,12 @@ def generate_header_param(custom_header, tool_name=None):
         'gospider': generate_gospider_params(custom_header),
     }
 
+    # Get the appropriate format based on the tool name
+    result = format_mapping.get(tool_name, format_mapping.get('common'))
+    logger.debug(f"Selected format for {tool_name}: {result}")
+
     # Return the corresponding parameter for the specified tool or default to common_headers format
-    return format_mapping.get(tool_name, format_mapping.get('common'))
+    return result
 
 def generate_gospider_params(custom_header):
     """
@@ -1139,3 +1148,92 @@ def extract_columns(row, columns):
         list: Extracted values from the specified columns.
     """
     return [row[i] for i in columns]
+
+def prepare_command(cmd, shell):
+    """
+    Prepare the command for execution.
+
+    Args:
+        cmd (str): The command to prepare.
+        shell (bool): Whether to use shell execution.
+
+    Returns:
+        str or list: The prepared command, either as a string (for shell execution) or a list (for non-shell execution).
+    """
+    return cmd if shell else shlex.split(cmd)
+
+def create_command_object(cmd, scan_id, activity_id):
+    """
+    Create a Command object in the database.
+
+    Args:
+        cmd (str): The command to be executed.
+        scan_id (int): ID of the associated scan.
+        activity_id (int): ID of the associated activity.
+
+    Returns:
+        Command: The created Command object.
+    """
+    return Command.objects.create(
+        command=cmd,
+        time=timezone.now(),
+        scan_history_id=scan_id,
+        activity_id=activity_id
+    )
+
+def process_line(line, trunc_char=None):
+    """
+    Process a line of output from the command.
+
+    Args:
+        line (str): The line to process.
+        trunc_char (str, optional): Character to truncate the line. Defaults to None.
+
+    Returns:
+        str or dict: The processed line, either as a string or a JSON object if the line is valid JSON.
+    """
+    line = line.strip()
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    line = ansi_escape.sub('', line)
+    line = line.replace('\\x0d\\x0a', '\n')
+    if trunc_char and line.endswith(trunc_char):
+        line = line[:-1]
+    try:
+        return json.loads(line)
+    except json.JSONDecodeError:
+        return line
+
+def write_history(history_file, cmd, return_code, output):
+    """
+    Write command execution history to a file.
+
+    Args:
+        history_file (str): Path to the history file.
+        cmd (str): The executed command.
+        return_code (int): The return code of the command.
+        output (str): The output of the command.
+    """
+    mode = 'a' if os.path.exists(history_file) else 'w'
+    with open(history_file, mode) as f:
+        f.write(f'\n{cmd}\n{return_code}\n{output}\n------------------\n')
+
+def execute_command(command, shell, cwd):
+    """
+    Execute a command using subprocess.
+
+    Args:
+        command (str or list): The command to execute.
+        shell (bool): Whether to use shell execution.
+        cwd (str): The working directory for the command.
+
+    Returns:
+        subprocess.Popen: The Popen object for the executed command.
+    """
+    return subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        universal_newlines=True,
+        shell=shell,
+        cwd=cwd
+    )
diff --git a/web/reNgine/tasks.py b/web/reNgine/tasks.py
index 9c88640f..ba8fef78 100644
--- a/web/reNgine/tasks.py
+++ b/web/reNgine/tasks.py
@@ -1784,9 +1784,11 @@ def fetch_url(self, urls=[], ctx={}, description=None):
 
 	# Initialize the URLs
 	if urls and is_iterable(urls):
+		logger.debug(f'URLs provided by user')
 		with open(input_path, 'w') as f:
 			f.write('\n'.join(urls))
 	else:
+		logger.debug(f'URLs gathered from database')
 		urls = get_http_urls(
 			is_alive=enable_http_crawl,
 			write_filepath=input_path,
@@ -4161,139 +4163,91 @@ def remove_duplicate_endpoints(
 
 
 @app.task(name='run_command', bind=False, queue='run_command_queue')
-def run_command(
-		cmd, 
-		cwd=None, 
-		shell=False, 
-		history_file=None, 
-		scan_id=None, 
-		activity_id=None,
-		remove_ansi_sequence=False
-	):
-	"""Run a given command using subprocess module.
-
-	Args:
-		cmd (str): Command to run.
-		cwd (str): Current working directory.
-		echo (bool): Log command.
-		shell (bool): Run within separate shell if True.
-		history_file (str): Write command + output to history file.
-		remove_ansi_sequence (bool): Used to remove ANSI escape sequences from output such as color coding
-	Returns:
-		tuple: Tuple with return_code, output.
-	"""
-	logger.info(cmd)
-	logger.warning(activity_id)
-
-	# Create a command record in the database
-	command_obj = Command.objects.create(
-		command=cmd,
-		time=timezone.now(),
-		scan_history_id=scan_id,
-		activity_id=activity_id)
-
-	# Run the command using subprocess
-	popen = subprocess.Popen(
-		cmd if shell else cmd.split(),
-		shell=shell,
-		stdout=subprocess.PIPE,
-		stderr=subprocess.STDOUT,
-		cwd=cwd,
-		universal_newlines=True)
-	output = ''
-	for stdout_line in iter(popen.stdout.readline, ""):
-		item = stdout_line.strip()
-		output += '\n' + item
-		logger.debug(item)
-	popen.stdout.close()
-	popen.wait()
-	return_code = popen.returncode
-	command_obj.output = output
-	command_obj.return_code = return_code
-	command_obj.save()
-	if history_file:
-		mode = 'a'
-		if not os.path.exists(history_file):
-			mode = 'w'
-		with open(history_file, mode) as f:
-			f.write(f'\n{cmd}\n{return_code}\n{output}\n------------------\n')
-	if remove_ansi_sequence:
-		output = remove_ansi_escape_sequences(output)
-	return return_code, output
-
-
-#-------------#
-# Other utils #
-#-------------#
+def run_command(cmd, cwd=None, shell=False, history_file=None, scan_id=None, activity_id=None, remove_ansi_sequence=False):
+    """
+    Execute a command and return its output.
+
+    Args:
+        cmd (str): The command to execute.
+        cwd (str, optional): The working directory for the command. Defaults to None.
+        shell (bool, optional): Whether to use shell execution. Defaults to False.
+        history_file (str, optional): File to write command history. Defaults to None.
+        scan_id (int, optional): ID of the associated scan. Defaults to None.
+        activity_id (int, optional): ID of the associated activity. Defaults to None.
+        remove_ansi_sequence (bool, optional): Whether to remove ANSI escape sequences from output. Defaults to False.
+
+    Returns:
+        tuple: A tuple containing the return code and output of the command.
+    """
+    logger.info(f"Executing command: {cmd}")
+    command_obj = create_command_object(cmd, scan_id, activity_id)
+    command = prepare_command(cmd, shell)
+    logger.debug(f"Prepared run command: {command}")
+
+    process = execute_command(command, shell, cwd)
+    output = ''
+    for stdout_line in iter(process.stdout.readline, ""):
+        item = stdout_line.strip()
+        output += '\n' + item
+        logger.debug(item)
+    
+    process.stdout.close()
+    process.wait()
+    return_code = process.returncode
+    command_obj.output = output
+    command_obj.return_code = return_code
+    command_obj.save()
+
+    if history_file:
+        write_history(history_file, cmd, return_code, output)
+    
+    if remove_ansi_sequence:
+        output = remove_ansi_escape_sequences(output)
+    
+    return return_code, output
 
 def stream_command(cmd, cwd=None, shell=False, history_file=None, encoding='utf-8', scan_id=None, activity_id=None, trunc_char=None):
-	# Log cmd
-	logger.info(cmd)
-	# logger.warning(activity_id)
-
-	# Create a command record in the database
-	command_obj = Command.objects.create(
-		command=cmd,
-		time=timezone.now(),
-		scan_history_id=scan_id,
-		activity_id=activity_id)
-
-	# Sanitize the cmd
-	command = cmd if shell else cmd.split()
-
-	# Run the command using subprocess
-	process = subprocess.Popen(
-		command,
-		stdout=subprocess.PIPE,
-		stderr=subprocess.STDOUT,
-		universal_newlines=True,
-		shell=shell)
-
-	# Log the output in real-time to the database
-	output = ""
-
-	# Process the output
-	for line in iter(lambda: process.stdout.readline(), b''):
-		if not line:
-			break
-		line = line.strip()
-		ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
-		line = ansi_escape.sub('', line)
-		line = line.replace('\\x0d\\x0a', '\n')
-		if trunc_char and line.endswith(trunc_char):
-			line = line[:-1]
-		item = line
-
-		# Try to parse the line as JSON
-		try:
-			item = json.loads(line)
-		except json.JSONDecodeError:
-			pass
-
-		# Yield the line
-		#logger.debug(item)
-		yield item
-
-		# Add the log line to the output
-		output += line + "\n"
-
-		# Update the command record in the database
-		command_obj.output = output
-		command_obj.save()
-
-	# Retrieve the return code and output
-	process.wait()
-	return_code = process.returncode
-
-	# Update the return code and final output in the database
-	command_obj.return_code = return_code
-	command_obj.save()
-
-	# Append the command, return code and output to the history file
-	if history_file is not None:
-		with open(history_file, "a") as f:
-			f.write(f"{cmd}\n{return_code}\n{output}\n")
-
+    """
+    Execute a command and yield its output line by line.
+
+    Args:
+        cmd (str): The command to execute.
+        cwd (str, optional): The working directory for the command. Defaults to None.
+        shell (bool, optional): Whether to use shell execution. Defaults to False.
+        history_file (str, optional): File to write command history. Defaults to None.
+        encoding (str, optional): Encoding for the command output. Defaults to 'utf-8'.
+        scan_id (int, optional): ID of the associated scan. Defaults to None.
+        activity_id (int, optional): ID of the associated activity. Defaults to None.
+        trunc_char (str, optional): Character to truncate lines. Defaults to None.
+
+    Yields:
+        str: Each line of the command output.
+    """
+    logger.info(f"Starting execution of command: {cmd}")
+    command_obj = create_command_object(cmd, scan_id, activity_id)
+    command = prepare_command(cmd, shell)
+    logger.debug(f"Prepared stream command: {command}")
+    
+    process = execute_command(command, shell, cwd)
+    output = ""
+
+    for line in iter(process.stdout.readline, b''):
+        if not line:
+            break
+        item = process_line(line, trunc_char)
+        yield item
+        output += line
+        command_obj.output = output
+        command_obj.save()
+
+    process.wait()
+    return_code = process.returncode
+    command_obj.return_code = return_code
+    command_obj.save()
+    logger.info(f'Command returned exit code: {return_code}')
+
+    if history_file:
+        write_history(history_file, cmd, return_code, output)
 
 def process_httpx_response(line):
 	"""TODO: implement this"""

From 511f499d2106f02a54f76e6995d3141e1ca06f97 Mon Sep 17 00:00:00 2001
From: psyray <psyray@users.noreply.github.com>
Date: Tue, 17 Sep 2024 02:21:49 +0200
Subject: [PATCH 3/3] feat: add input sanitization and utf-8 encoding support

- Introduced a clean_quotes function to sanitize input data by removing double quotes.
- Updated form handling in add_engine and update_engine views to use the clean_quotes function for input sanitization.
- Added UTF-8 encoding support to various file operations to ensure proper handling of text files.
- Enhanced parse_custom_header function to validate header values and raise errors for invalid formats.
---
 web/reNgine/common_func.py | 16 +++++++++++++--
 web/scanEngine/views.py    | 40 +++++++++++++++++++++++++-------------
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/web/reNgine/common_func.py b/web/reNgine/common_func.py
index 56d2481d..d8c114d5 100644
--- a/web/reNgine/common_func.py
+++ b/web/reNgine/common_func.py
@@ -1045,9 +1045,11 @@ def extract_between(text, pattern):
 		return match.group(1).strip()
 	return ""
 
+import re
+
 def parse_custom_header(custom_header):
     """
-    Parse the custom_header input to ensure it is a dictionary.
+    Parse the custom_header input to ensure it is a dictionary with valid header values.
 
     Args:
         custom_header (dict or str): Dictionary or string containing the custom headers.
@@ -1055,6 +1057,8 @@ def parse_custom_header(custom_header):
     Returns:
         dict: Parsed dictionary of custom headers.
     """
+    def is_valid_header_value(value):
+        return bool(re.match(r'^[\w\-\s.,;:@()/+*=\'\[\]{}]+$', value))
 
     if isinstance(custom_header, str):
         header_dict = {}
@@ -1063,11 +1067,19 @@ def parse_custom_header(custom_header):
             parts = header.split(':', 1)
             if len(parts) == 2:
                 key, value = parts
-                header_dict[key.strip()] = value.strip()
+                key = key.strip()
+                value = value.strip()
+                if is_valid_header_value(value):
+                    header_dict[key] = value
+                else:
+                    raise ValueError(f"Invalid header value: '{value}'")
             else:
                 raise ValueError(f"Invalid header format: '{header}'")
         return header_dict
     elif isinstance(custom_header, dict):
+        for key, value in custom_header.items():
+            if not is_valid_header_value(value):
+                raise ValueError(f"Invalid header value: '{value}'")
         return custom_header
     else:
         raise ValueError("custom_header must be a dictionary or a string")
diff --git a/web/scanEngine/views.py b/web/scanEngine/views.py
index 664ecb0e..d98cf73c 100644
--- a/web/scanEngine/views.py
+++ b/web/scanEngine/views.py
@@ -3,7 +3,6 @@
 import os
 import re
 import shutil
-import subprocess
 
 from datetime import datetime
 from django import http
@@ -37,19 +36,30 @@ def index(request, slug):
     }
     return render(request, 'scanEngine/index.html', context)
 
+def clean_quotes(data):
+    if isinstance(data, dict):
+        return {key: clean_quotes(value) for key, value in data.items()}
+    elif isinstance(data, list):
+        return [clean_quotes(item) for item in data]
+    elif isinstance(data, str):
+        return data.replace('"', '')
+    return data
 
 @has_permission_decorator(PERM_MODIFY_SCAN_CONFIGURATIONS, redirect_url=FOUR_OH_FOUR_URL)
 def add_engine(request, slug):
     form = AddEngineForm()
     
     # load default yaml config
-    with open(RENGINE_HOME + '/config/default_yaml_config.yaml', 'r') as yaml_file:
+    with open(RENGINE_HOME + '/config/default_yaml_config.yaml', 'r', encoding='utf-8') as yaml_file:
         default_config = yaml_file.read()
     
     if request.method == "POST":
         form = AddEngineForm(request.POST)
         if form.is_valid():
-            form.save()
+            cleaned_data = {key: clean_quotes(value) for key, value in form.cleaned_data.items()}
+            for key, value in cleaned_data.items():
+                setattr(form.instance, key, value) 
+            form.instance.save()
             messages.add_message(
                 request,
                 messages.INFO,
@@ -96,7 +106,10 @@ def update_engine(request, slug, id):
     if request.method == "POST":
         form = UpdateEngineForm(request.POST, instance=engine)
         if form.is_valid():
-            form.save()
+            cleaned_data = {key: clean_quotes(value) for key, value in form.cleaned_data.items()}
+            for key, value in cleaned_data.items():
+                setattr(form.instance, key, value) 
+            form.instance.save()
             messages.add_message(
                 request,
                 messages.INFO,
@@ -130,7 +143,8 @@ def add_wordlist(request, slug):
                 wordlist_content = txt_file.read().decode('UTF-8', "ignore")
                 wordlist_file = open(
                     Path(RENGINE_WORDLISTS) / f"{form.cleaned_data['short_name']}.txt",
-                    'w'
+                    'w',
+                    encoding='utf-8',
                 )
                 wordlist_file.write(wordlist_content)
                 Wordlist.objects.create(
@@ -217,7 +231,7 @@ def tool_specific_settings(request, slug):
                 # remove special chars from filename, that could possibly do directory traversal or XSS
                 filename = re.sub(r'[\\/*?:"<>|]',"", gf_file.name)
                 file_path = Path.home() / '.gf/' / filename
-                with open(file_path, "w") as file:
+                with open(file_path, "w", encoding='utf-8') as file:
                     file.write(gf_file.read().decode("utf-8"))
                 messages.add_message(request, messages.INFO, f'Pattern {gf_file.name[:4]} successfully uploaded')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
@@ -230,43 +244,43 @@ def tool_specific_settings(request, slug):
             else:
                 filename = re.sub(r'[\\/*?:"<>|]',"", nuclei_file.name)
                 file_path = Path.home() / 'nuclei-templates/' / filename
-                with open(file_path, "w") as file:
+                with open(file_path, "w", encoding='utf-8') as file:
                     file.write(nuclei_file.read().decode("utf-8"))
                 messages.add_message(request, messages.INFO, f'Nuclei Pattern {nuclei_file.name[:-5]} successfully uploaded')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
 
         elif 'nuclei_config_text_area' in request.POST:
-            with open(Path.home() / '.config' / 'nuclei' / 'config.yaml', "w") as fhandle:
+            with open(Path.home() / '.config' / 'nuclei' / 'config.yaml', "w", encoding='utf-8') as fhandle:
                 fhandle.write(request.POST.get('nuclei_config_text_area'))
             messages.add_message(request, messages.INFO, 'Nuclei config updated!')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
 
         elif 'subfinder_config_text_area' in request.POST:
-            with open(Path.home() / '.config' / 'subfinder' / 'config.yaml', "w") as fhandle:
+            with open(Path.home() / '.config' / 'subfinder' / 'config.yaml', "w", encoding='utf-8') as fhandle:
                 fhandle.write(request.POST.get('subfinder_config_text_area'))
             messages.add_message(request, messages.INFO, 'Subfinder config updated!')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
 
         elif 'naabu_config_text_area' in request.POST:
-            with open(Path.home() / '.config' / 'naabu' / 'config.yaml', "w") as fhandle:
+            with open(Path.home() / '.config' / 'naabu' / 'config.yaml', "w", encoding='utf-8') as fhandle:
                 fhandle.write(request.POST.get('naabu_config_text_area'))
             messages.add_message(request, messages.INFO, 'Naabu config updated!')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
 
         elif 'amass_config_text_area' in request.POST:
-            with open(Path.home() / '.config' / 'amass.ini', "w") as fhandle:
+            with open(Path.home() / '.config' / 'amass.ini', "w", encoding='utf-8') as fhandle:
                 fhandle.write(request.POST.get('amass_config_text_area'))
             messages.add_message(request, messages.INFO, 'Amass config updated!')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
 
         elif 'theharvester_config_text_area' in request.POST:
-            with open(Path.home() / '.config' / 'theHarvester' / 'api-keys.yaml', "w") as fhandle:
+            with open(Path.home() / '.config' / 'theHarvester' / 'api-keys.yaml', "w", encoding='utf-8') as fhandle:
                 fhandle.write(request.POST.get('theharvester_config_text_area'))
             messages.add_message(request, messages.INFO, 'theHarvester config updated!')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))
 
         elif 'gau_config_text_area' in request.POST:
-            with open(Path.home() / '.config' / '.gau.toml', "w") as fhandle:
+            with open(Path.home() / '.config' / '.gau.toml', "w", encoding='utf-8') as fhandle:
                 fhandle.write(request.POST.get('gau_config_text_area'))
             messages.add_message(request, messages.INFO, 'GAU config updated!')
             return http.HttpResponseRedirect(reverse('tool_settings', kwargs={'slug': slug}))