From 783f6b1c42295822d60f94b6dbee4932c1dc12f5 Mon Sep 17 00:00:00 2001 From: "mihai.state" Date: Sat, 17 Feb 2024 17:37:56 +0000 Subject: [PATCH 1/6] Created a new method for output optimisation Implemented a function in esbmc_util that uses regex patterns to remove some output lines to reduce token consmuption and to facilitate the reduction of the likelihood that LLMs like GPT 3.5 turbo reach the token limit for some codebases that need comprehensive explanations. Addded in config.json four new cases that address common bugs in C programs: buffer overflow, arithmetic overflow, array out-of-bounds and memory leaks. Signed-off-by: mihai.state --- .env.example | 3 --- config.json | 52 +++++++++++++++++++++++++++++++++++++++++- esbmc_ai/__main__.py | 17 ++++++++++++++ esbmc_ai/esbmc_util.py | 19 +++++++++++++++ 4 files changed, 87 insertions(+), 4 deletions(-) delete mode 100644 .env.example diff --git a/.env.example b/.env.example deleted file mode 100644 index 7a86880..0000000 --- a/.env.example +++ /dev/null @@ -1,3 +0,0 @@ -OPENAI_API_KEY=XXXXXXXXXXX -HUGGINGFACE_API_KEY=YYYYYYYYYY -ESBMC_AI_CFG_PATH=./config.json \ No newline at end of file diff --git a/config.json b/config.json index e3bdfd6..e233f3c 100644 --- a/config.json +++ b/config.json @@ -1,7 +1,7 @@ { "ai_model": "gpt-3.5-turbo-16k", "ai_custom": {}, - "esbmc_path": "~/.local/bin/esbmc", + "esbmc_path": "~/ESBMC/bin/esbmc", "allow_successful": true, "esbmc_params": [ "--interval-analysis", @@ -63,6 +63,7 @@ } ] }, + "generate_solution": { "temperature": 1.3, "scenarios": { @@ -77,6 +78,55 @@ "content": "OK." } ] + }, + "array out-of-bounds": { + "system" : [ + { + "role":"System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified an array out-of-bounds issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where array out-of-bounds might occur. The solution should prevent undefined behavior,potential crashes, security vulnerabilities or data integrity compromises due to out-of-bounds array. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behavior and handles out-of-bounds arrays effectively.\nGuidelines: Implement assertions for the array indices. Output: Provide the corrected, complete C code. The solution should compile and run error-free, addressing the out-of-bounds array vulnerability.\nStart the code snippet with ```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + } + , + "arithmetic overflow":{ + "system":[ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified an array out-of-bounds issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where an arithmetic overflow might occur. The solution should prevent unexpected behaviour, crashes or vulnerabilites due to the arithmetic overflow. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles arithmetic overflows effectively.\nGuidelines: Implement precondition checks or assertions. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the arithmetic overflow vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + }, + "buffer overflow": { + "system":[ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a buffer overflow issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a buffer overflow might occur. The solution should prevent unexpected behaviour, crashes or vulnerabilites due to the buffer overflow. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles buffer overflows effectively.\nGuidelines: Implement precondition checks or assertions. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the buffer overflow vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + }, + "memory leak": { + "system":[ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a memory leak issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a memory leak might occur. The solution should prevent unexpected behaviour, crashes or vulnerabilites due to the memory leak. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles memory leaks effectively.\nGuidelines: Implement memory frees where appropiate, check memory allocation return values, release resources in cleanup or exit functions, use automatic variables. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the memory leak vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] } }, "system": [ diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index 71952b0..a9f7427 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -175,6 +175,23 @@ def parse_command(user_prompt_string: str) -> tuple[str, list[str]]: command_args: list[str] = parsed_array[1:] return command, command_args +def esbmc_output_optimisation(esbmc_output:str) -> str: + + esbmc_output =re.sub(r'^\d+. The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^\d+. This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'[-]+', '', esbmc_output) # Remove lines of dashes + esbmc_output = re.sub(r'\b[0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8}\b', '', esbmc_output) # Remove hex patterns + esbmc_output = re.sub(r'^Line \d+: ESBMC is using the Boolector solver \d+\.\d+\.\d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) # Remove Boolector lines + esbmc_output = re.sub(r'\d+. ESBMC is using the Boolector solver \d+\. \d+\. \d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^Line \d+: The solver takes \d+\.\d+s to determine the runtime decision procedure\.$', '', esbmc_output, flags=re.MULTILINE) # Remove solver time lines + esbmc_output = re.sub(r'.*Boolector.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'/\*.*?\*/', '', esbmc_output, flags=re.MULTILINE) + pattern = r"- `.*Slicing time: \d+\.\d+s \(removed \d+ assignments\)`.*: ESBMC has performed slicing, a technique that removes irrelevant assignments from the program and reduces the complexity of analysis." + esbmc_output = re.sub(pattern, '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\n\*s\n', '\n', esbmc_output) + esbmc_output = esbmc_output.replace("output from ESBMC", "ESBMC output") + + return esbmc_output def main() -> None: init_commands_list() diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index ddd66aa..2304ca5 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -22,8 +22,27 @@ def esbmc(path: str, esbmc_params: list): exit_code = process.wait() output: str = str(output_bytes).replace("\\n", "\n") err: str = str(err_bytes).replace("\\n", "\n") + output = esbmc_output_optimisation(output) return exit_code, output, err +def esbmc_output_optimisation(esbmc_output:str) -> str: + + esbmc_output =re.sub(r'^\d+. The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^\d+. This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'[-]+', '', esbmc_output) # Remove lines of dashes + esbmc_output = re.sub(r'\b[0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8}\b', '', esbmc_output) # Remove hex patterns + esbmc_output = re.sub(r'^Line \d+: ESBMC is using the Boolector solver \d+\.\d+\.\d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) # Remove Boolector lines + esbmc_output = re.sub(r'\d+. ESBMC is using the Boolector solver \d+\. \d+\. \d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^Line \d+: The solver takes \d+\.\d+s to determine the runtime decision procedure\.$', '', esbmc_output, flags=re.MULTILINE) # Remove solver time lines + esbmc_output = re.sub(r'.*Boolector.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'/\*.*?\*/', '', esbmc_output, flags=re.MULTILINE) + pattern = r"- `.*Slicing time: \d+\.\d+s \(removed \d+ assignments\)`.*: ESBMC has performed slicing, a technique that removes irrelevant assignments from the program and reduces the complexity of analysis." + esbmc_output = re.sub(pattern, '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\n\*s\n', '\n', esbmc_output) + esbmc_output = esbmc_output.replace("output from ESBMC", "ESBMC output") + + return esbmc_output + def esbmc_load_source_code( file_path: str, From d8f670465669cebf99ffe5b3e37006949934d696 Mon Sep 17 00:00:00 2001 From: "mihai.state" Date: Sun, 18 Feb 2024 21:30:16 +0000 Subject: [PATCH 2/6] Fix minor changes --- esbmc_ai/__main__.py | 4 +++- esbmc_ai/esbmc_util.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index a9f7427..9cc6045 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -193,6 +193,7 @@ def esbmc_output_optimisation(esbmc_output:str) -> str: return esbmc_output + esbmc_output = esbmc_output_optimisation(esbmc_output) def main() -> None: init_commands_list() @@ -330,6 +331,7 @@ def main() -> None: api_keys=config.api_keys, temperature=config.chat_prompt_user_mode.temperature, ) + esbmc_output = esbmc_output_optimisation(esbmc_output) printv("Creating user chat") global chat @@ -385,7 +387,7 @@ def main() -> None: source_code=get_main_source_file().content, esbmc_output=esbmc_output, ) - + if not error: print( "\n\nESBMC-AI: Here is the corrected code, verified with ESBMC:" diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 2304ca5..23bcf72 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -1,6 +1,7 @@ # Author: Yiannis Charalambous import os +import re from subprocess import Popen, PIPE, STDOUT from pathlib import Path From 1dc5c0b619afccfb370d921da147b593e53365f1 Mon Sep 17 00:00:00 2001 From: "mihai.state" Date: Tue, 5 Mar 2024 15:48:41 +0000 Subject: [PATCH 3/6] Improved the ESBMC-AI output reduction Created more regex patterns that remove some lines that can be ommitted and provide shorter outputs. Added a few more indications in the config.json file that tell the LLM to avoid mentioning time measurements Signed-off-by: mihai.state --- config.json | 10 +- esbmc-ai | 2 +- esbmc_ai/__main__.py | 27 +- esbmc_ai/esbmc_util.py | 107 +++++++- myenv/bin/Activate.ps1 | 247 ++++++++++++++++++ myenv/bin/activate | 69 +++++ myenv/bin/activate.csh | 26 ++ myenv/bin/activate.fish | 66 +++++ myenv/bin/distro | 8 + myenv/bin/dotenv | 8 + myenv/bin/f2py | 8 + myenv/bin/httpx | 8 + myenv/bin/jsondiff | 41 +++ myenv/bin/jsonpatch | 107 ++++++++ myenv/bin/jsonpointer | 69 +++++ myenv/bin/langchain-server | 8 + myenv/bin/langsmith | 8 + myenv/bin/nltk | 8 + myenv/bin/normalizer | 8 + myenv/bin/openai | 8 + myenv/bin/pip | 8 + myenv/bin/pip3 | 8 + myenv/bin/pip3.10 | 8 + myenv/bin/pip3.11 | 8 + myenv/bin/python | 1 + myenv/bin/python3 | 1 + myenv/bin/python3.11 | 1 + myenv/bin/tqdm | 8 + myenv/bin/wheel | 8 + .../site/python3.11/greenlet/greenlet.h | 164 ++++++++++++ myenv/lib64 | 1 + myenv/pyvenv.cfg | 5 + path/to/your/virtualenv/bin/Activate.ps1 | 247 ++++++++++++++++++ path/to/your/virtualenv/bin/activate | 69 +++++ path/to/your/virtualenv/bin/activate.csh | 26 ++ path/to/your/virtualenv/bin/activate.fish | 66 +++++ path/to/your/virtualenv/bin/pip | 8 + path/to/your/virtualenv/bin/pip3 | 8 + path/to/your/virtualenv/bin/pip3.10 | 8 + path/to/your/virtualenv/bin/pip3.11 | 8 + path/to/your/virtualenv/bin/python | 1 + path/to/your/virtualenv/bin/python3 | 1 + path/to/your/virtualenv/bin/python3.11 | 1 + path/to/your/virtualenv/lib64 | 1 + path/to/your/virtualenv/pyvenv.cfg | 5 + 45 files changed, 1484 insertions(+), 29 deletions(-) create mode 100644 myenv/bin/Activate.ps1 create mode 100644 myenv/bin/activate create mode 100644 myenv/bin/activate.csh create mode 100644 myenv/bin/activate.fish create mode 100755 myenv/bin/distro create mode 100755 myenv/bin/dotenv create mode 100755 myenv/bin/f2py create mode 100755 myenv/bin/httpx create mode 100755 myenv/bin/jsondiff create mode 100755 myenv/bin/jsonpatch create mode 100755 myenv/bin/jsonpointer create mode 100755 myenv/bin/langchain-server create mode 100755 myenv/bin/langsmith create mode 100755 myenv/bin/nltk create mode 100755 myenv/bin/normalizer create mode 100755 myenv/bin/openai create mode 100755 myenv/bin/pip create mode 100755 myenv/bin/pip3 create mode 100755 myenv/bin/pip3.10 create mode 100755 myenv/bin/pip3.11 create mode 120000 myenv/bin/python create mode 120000 myenv/bin/python3 create mode 120000 myenv/bin/python3.11 create mode 100755 myenv/bin/tqdm create mode 100755 myenv/bin/wheel create mode 100644 myenv/include/site/python3.11/greenlet/greenlet.h create mode 120000 myenv/lib64 create mode 100644 myenv/pyvenv.cfg create mode 100644 path/to/your/virtualenv/bin/Activate.ps1 create mode 100644 path/to/your/virtualenv/bin/activate create mode 100644 path/to/your/virtualenv/bin/activate.csh create mode 100644 path/to/your/virtualenv/bin/activate.fish create mode 100755 path/to/your/virtualenv/bin/pip create mode 100755 path/to/your/virtualenv/bin/pip3 create mode 100755 path/to/your/virtualenv/bin/pip3.10 create mode 100755 path/to/your/virtualenv/bin/pip3.11 create mode 120000 path/to/your/virtualenv/bin/python create mode 120000 path/to/your/virtualenv/bin/python3 create mode 120000 path/to/your/virtualenv/bin/python3.11 create mode 120000 path/to/your/virtualenv/lib64 create mode 100644 path/to/your/virtualenv/pyvenv.cfg diff --git a/config.json b/config.json index e233f3c..e855782 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "ai_model": "gpt-3.5-turbo-16k", + "ai_model": "gpt-4-turbo-preview", "ai_custom": {}, "esbmc_path": "~/ESBMC/bin/esbmc", "allow_successful": true, @@ -28,12 +28,13 @@ "system": [ { "role": "System", - "content": "You are a security focused assistant that parses output from a program called ESBMC and explains the output to the user. ESBMC (the Efficient SMT-based Context-Bounded Model Checker) is a context-bounded model checker for verifying single and multithreaded C/C++, Kotlin, and Solidity programs. It can automatically verify both predefined safety properties (e.g., bounds check, pointer safety, overflow) and user-defined program assertions. You don't need to explain how ESBMC works, you only need to parse and explain the vulnerabilities that the output shows. For each line of code explained, say what the line number is as well. Do not answer any questions outside of these explicit parameters. If you understand, reply OK." + "content": "You are a security focused assistant that parses output from a program called ESBMC and explains the output to the user. ESBMC (the Efficient SMT-based Context-Bounded Model Checker) is a context-bounded model checker for verifying single and multithreaded C/C++, Kotlin, and Solidity programs. It can automatically verify both predefined safety properties (e.g., bounds check, pointer safety, overflow) and user-defined program assertions. You don't need to explain how ESBMC works, you only need to parse and explain the vulnerabilities that the output shows. For each line of code explained, say what the line number is as well. Do not answer any questions outside of these explicit parameters. If the initial output exceeds the token limit of the respective LLM model, remove the sequences (sentences or parts of sentences) that are related to any kind of assertions, assumptions, time measurements, that are in seconds, to the GOTO program, to the Boolector solver, bitvector, floating-point arithmetic, remaining VCCs and try to use less words to explain something if possible, create a larger sentence instead of two ones that have a direct corelation if result is not a very large one. Do not put time measurements (that are in seconds) in the output(e.g ., Symex completed in: 0.018s), like encoding time, runtime, slicing time, runtime decision procedure, do not use explanations for slicing. Avoid even mentioning any time measurements. If you understand, reply OK." }, { "role": "AI", "content": "OK" }, + { "role": "System", "content": "Reply OK if you understand that the following text is the program source code:\n\n```c{source_code}```" @@ -128,6 +129,9 @@ } ] } + }, + "null pointer":{ + }, "system": [ { @@ -169,7 +173,7 @@ "content": "OK" } ], - "initial": "Optimize the function \"%s\". Reoply with the entire source file back.", + "initial": "Optimize the function \"%s\". Reply with the entire source file back.", "esbmc_params": [ "--incremental-bmc", "--no-bounds-check", diff --git a/esbmc-ai b/esbmc-ai index 937d073..c8001e1 100755 --- a/esbmc-ai +++ b/esbmc-ai @@ -1,3 +1,3 @@ #!/usr/bin/env sh -python -m esbmc_ai $@ +python3 -m esbmc_ai $@ diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index 9cc6045..ce3165b 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -8,11 +8,11 @@ from time import sleep # Enables arrow key functionality for input(). Do not remove import. -import readline +#import readline import argparse from langchain.base_language import BaseLanguageModel - +import langchain import esbmc_ai.config as config from esbmc_ai import __author__, __version__ @@ -175,25 +175,7 @@ def parse_command(user_prompt_string: str) -> tuple[str, list[str]]: command_args: list[str] = parsed_array[1:] return command, command_args -def esbmc_output_optimisation(esbmc_output:str) -> str: - - esbmc_output =re.sub(r'^\d+. The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'^\d+. This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'[-]+', '', esbmc_output) # Remove lines of dashes - esbmc_output = re.sub(r'\b[0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8}\b', '', esbmc_output) # Remove hex patterns - esbmc_output = re.sub(r'^Line \d+: ESBMC is using the Boolector solver \d+\.\d+\.\d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) # Remove Boolector lines - esbmc_output = re.sub(r'\d+. ESBMC is using the Boolector solver \d+\. \d+\. \d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'^Line \d+: The solver takes \d+\.\d+s to determine the runtime decision procedure\.$', '', esbmc_output, flags=re.MULTILINE) # Remove solver time lines - esbmc_output = re.sub(r'.*Boolector.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output =re.sub(r'/\*.*?\*/', '', esbmc_output, flags=re.MULTILINE) - pattern = r"- `.*Slicing time: \d+\.\d+s \(removed \d+ assignments\)`.*: ESBMC has performed slicing, a technique that removes irrelevant assignments from the program and reduces the complexity of analysis." - esbmc_output = re.sub(pattern, '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\n\*s\n', '\n', esbmc_output) - esbmc_output = esbmc_output.replace("output from ESBMC", "ESBMC output") - - return esbmc_output - - esbmc_output = esbmc_output_optimisation(esbmc_output) + def main() -> None: init_commands_list() @@ -331,8 +313,7 @@ def main() -> None: api_keys=config.api_keys, temperature=config.chat_prompt_user_mode.temperature, ) - esbmc_output = esbmc_output_optimisation(esbmc_output) - + printv("Creating user chat") global chat chat = UserChat( diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 23bcf72..1fda085 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -2,11 +2,18 @@ import os import re +import sys from subprocess import Popen, PIPE, STDOUT from pathlib import Path - +import esbmc_ai.config as config +from esbmc_ai.ai_models import _ai_model_names from . import config +import nltk +from nltk.tokenize import word_tokenize,sent_tokenize +def optimize_output(output): + print("Optimiziiiiiingg aaaaaa") + return output def esbmc(path: str, esbmc_params: list): """Exit code will be 0 if verification successful, 1 if verification @@ -23,11 +30,15 @@ def esbmc(path: str, esbmc_params: list): exit_code = process.wait() output: str = str(output_bytes).replace("\\n", "\n") err: str = str(err_bytes).replace("\\n", "\n") + #output = optimize_output(output) + # print("HELLO before the call") output = esbmc_output_optimisation(output) + # print("HELLO after the code") + #sys.stdout.flush() return exit_code, output, err def esbmc_output_optimisation(esbmc_output:str) -> str: - + esbmc_output =re.sub(r'^\d+. The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'^\d+. This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'[-]+', '', esbmc_output) # Remove lines of dashes @@ -41,9 +52,101 @@ def esbmc_output_optimisation(esbmc_output:str) -> str: esbmc_output = re.sub(pattern, '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'\n\*s\n', '\n', esbmc_output) esbmc_output = esbmc_output.replace("output from ESBMC", "ESBMC output") + #esbmc_output = re.sub(r'.*runtime decision.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'The time.*', '', esbmc_output, flags=re.MULTILINE) + #VCCs represent the assertions in the code that need to be verified + esbmc_output = re.sub(r'VCCs represent.*', '', esbmc_output, flags=re.MULTILINE) + #if(config.ai_model.name != "gpt-4-32K"): + #ESBMC then encodes the remaining VCCs using bitvector and floating-point arithmetic, and displays the time taken for this encoding process. + esbmc_output = re.sub(r'.*remaining VCCs.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*bitvector.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*floating-point arithmetic.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*encoding.*', '', esbmc_output, flags = re.MULTILINE) + esbmc_output = re.sub(r'.*GOTO program.*', '', esbmc_output, flags=re.MULTILINE) + #Next, ESBMC states that it is starting the runtime decision procedure. This is the step where ESBMC makes concrete assignments to the program variables to check the property being verified. + #It creates VCCs (verification conditions) and starts encoding them to a solver to solve the constraints. + #ESBMC encodes these VCCs to a solver, and in this case, the encoding time is shown as 0.019s. + #ESBMC encodes these VCCs to a solver, and in this case, the encoding time is shown as 0.019s. ESBMC then uses a runtime decision procedure to solve the encoded formulas, which takes 1.351s for this analysis. The decision procedure involves executing various branches of the program symbolically and making decisions based on constraints and conditions. + esbmc_output = re.sub(r'.*encoding time.*', '', esbmc_output, flags= re.MULTILINE) + esbmc_output = re.sub(r', which takes \d+\.\d+s \.*', '.', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'The output indicates that ESBMC generates a verification condition \(VCC\) for each possible path and encodes them into a solver\.', '', esbmc_output, flags= re.MULTILINE) + # esbmc_output = re.sub(r'In this case, the decision procedure took \d+\.\d+s \. seconds.','', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'Measures the [^.]+?encoding time [^.]+?\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'In this case, the procedure took [0-9.]+ seconds\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\s*It measures[^.]+? encoding time [^.]+?\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*\n', '.\n', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*? GOTO Program[^.]*\.\s*', '', esbmc_output) + esbmc_output = re.sub(r'.*? GOTO program[^.]*\.\s*', '', esbmc_output) + esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*time.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*program time: \d+\.\d+s \.', '',esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'"Runtime decision procedure: \d+\. \d+s" \.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time: \d+\.\d+s', '', esbmc_output, flags= re.MULTILINE) + esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time: \d+\.\d+s', '', esbmc_output, flags= re.MULTILINE) + esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*Symex.*|.*symex.*|.*symbolic execution.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'After[^.]*?, the code', 'The program then', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*assignments.*|.*assignment.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*iteratively.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*branches.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*analysis process.*\s','', esbmc_output, flags=re.MULTILINE ) + esbmc_output = re.sub(r'.*remaining VCC.*\s|.*remaining VCCs.*\s','', esbmc_output) + esbmc_output = re.sub(r'.*0 VCCs.*', '', esbmc_output, flags= re.MULTILINE) # if there are no more VCCs left, the user is not going to take action in this regard. + esbmc_output = re.sub(r'Finally, ESBMC confirms that the verfication[^.]"VERIFICATION SUCCESSFUL."', 'Finally, ESBMC indicates a successful verification.', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*.*"Symex completed in:.*".*\s', '', esbmc_output, flags=re.MULTILINE) + + ##assigments verification + + return esbmc_output + +def reduce_output2(esbmc_output: str) -> str: + + esbmc_output = re.sub(r'\.\s*[^.]*symbolically execute the program[^.]*\.', '', esbmc_output, flags=re.MULTILINE) return esbmc_output +# def remove_runtime_decision_procedure(text): +# # Tokenize the text into sentences +# sentences = sent_tokenize(text) + +# # Define the pattern to be removed +# pattern = "Runtime decision procedure:" +# pattern2 = "BMC program time:" + +# # Iterate over each sentence +# for i in range(len(sentences)): +# # Tokenize the sentence into words +# words = word_tokenize(sentences[i]) + +# # If the pattern is in the sentence, remove it +# if pattern in words: +# # Find the index of the pattern +# index = words.index(pattern) + +# # Remove the pattern and the next two words (the time and 's') +# if len(words) > index + 2 and words[index + 2] == 's': +# del words[index:index+3] + +# # Join the words back into a sentence +# sentences[i] = ' '.join(words) +# if pattern2 in words: +# index = words.index(pattern2) + +# if len(words) > index+2 and words[index+2] == 's': +# del words[index:index+3] +# sentences[i] = ' '.join(words) + +# # Join the sentences back into a text +# modified_text = ' '.join(sentences) + +# return modified_text + +# text = "This is a test sentence. Runtime decision procedure: 1.387s I want this part to remain." +# print(remove_runtime_decision_procedure(text)) def esbmc_load_source_code( file_path: str, diff --git a/myenv/bin/Activate.ps1 b/myenv/bin/Activate.ps1 new file mode 100644 index 0000000..b49d77b --- /dev/null +++ b/myenv/bin/Activate.ps1 @@ -0,0 +1,247 @@ +<# +.Synopsis +Activate a Python virtual environment for the current PowerShell session. + +.Description +Pushes the python executable for a virtual environment to the front of the +$Env:PATH environment variable and sets the prompt to signify that you are +in a Python virtual environment. Makes use of the command line switches as +well as the `pyvenv.cfg` file values present in the virtual environment. + +.Parameter VenvDir +Path to the directory that contains the virtual environment to activate. The +default value for this is the parent of the directory that the Activate.ps1 +script is located within. + +.Parameter Prompt +The prompt prefix to display when this virtual environment is activated. By +default, this prompt is the name of the virtual environment folder (VenvDir) +surrounded by parentheses and followed by a single space (ie. '(.venv) '). + +.Example +Activate.ps1 +Activates the Python virtual environment that contains the Activate.ps1 script. + +.Example +Activate.ps1 -Verbose +Activates the Python virtual environment that contains the Activate.ps1 script, +and shows extra information about the activation as it executes. + +.Example +Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv +Activates the Python virtual environment located in the specified location. + +.Example +Activate.ps1 -Prompt "MyPython" +Activates the Python virtual environment that contains the Activate.ps1 script, +and prefixes the current prompt with the specified string (surrounded in +parentheses) while the virtual environment is active. + +.Notes +On Windows, it may be required to enable this Activate.ps1 script by setting the +execution policy for the user. You can do this by issuing the following PowerShell +command: + +PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser + +For more information on Execution Policies: +https://go.microsoft.com/fwlink/?LinkID=135170 + +#> +Param( + [Parameter(Mandatory = $false)] + [String] + $VenvDir, + [Parameter(Mandatory = $false)] + [String] + $Prompt +) + +<# Function declarations --------------------------------------------------- #> + +<# +.Synopsis +Remove all shell session elements added by the Activate script, including the +addition of the virtual environment's Python executable from the beginning of +the PATH variable. + +.Parameter NonDestructive +If present, do not remove this function from the global namespace for the +session. + +#> +function global:deactivate ([switch]$NonDestructive) { + # Revert to original values + + # The prior prompt: + if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) { + Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt + Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT + } + + # The prior PYTHONHOME: + if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) { + Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME + Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME + } + + # The prior PATH: + if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) { + Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH + Remove-Item -Path Env:_OLD_VIRTUAL_PATH + } + + # Just remove the VIRTUAL_ENV altogether: + if (Test-Path -Path Env:VIRTUAL_ENV) { + Remove-Item -Path env:VIRTUAL_ENV + } + + # Just remove VIRTUAL_ENV_PROMPT altogether. + if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) { + Remove-Item -Path env:VIRTUAL_ENV_PROMPT + } + + # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether: + if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) { + Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force + } + + # Leave deactivate function in the global namespace if requested: + if (-not $NonDestructive) { + Remove-Item -Path function:deactivate + } +} + +<# +.Description +Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the +given folder, and returns them in a map. + +For each line in the pyvenv.cfg file, if that line can be parsed into exactly +two strings separated by `=` (with any amount of whitespace surrounding the =) +then it is considered a `key = value` line. The left hand string is the key, +the right hand is the value. + +If the value starts with a `'` or a `"` then the first and last character is +stripped from the value before being captured. + +.Parameter ConfigDir +Path to the directory that contains the `pyvenv.cfg` file. +#> +function Get-PyVenvConfig( + [String] + $ConfigDir +) { + Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg" + + # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue). + $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue + + # An empty map will be returned if no config file is found. + $pyvenvConfig = @{ } + + if ($pyvenvConfigPath) { + + Write-Verbose "File exists, parse `key = value` lines" + $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath + + $pyvenvConfigContent | ForEach-Object { + $keyval = $PSItem -split "\s*=\s*", 2 + if ($keyval[0] -and $keyval[1]) { + $val = $keyval[1] + + # Remove extraneous quotations around a string value. + if ("'""".Contains($val.Substring(0, 1))) { + $val = $val.Substring(1, $val.Length - 2) + } + + $pyvenvConfig[$keyval[0]] = $val + Write-Verbose "Adding Key: '$($keyval[0])'='$val'" + } + } + } + return $pyvenvConfig +} + + +<# Begin Activate script --------------------------------------------------- #> + +# Determine the containing directory of this script +$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition +$VenvExecDir = Get-Item -Path $VenvExecPath + +Write-Verbose "Activation script is located in path: '$VenvExecPath'" +Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)" +Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)" + +# Set values required in priority: CmdLine, ConfigFile, Default +# First, get the location of the virtual environment, it might not be +# VenvExecDir if specified on the command line. +if ($VenvDir) { + Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values" +} +else { + Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir." + $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/") + Write-Verbose "VenvDir=$VenvDir" +} + +# Next, read the `pyvenv.cfg` file to determine any required value such +# as `prompt`. +$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir + +# Next, set the prompt from the command line, or the config file, or +# just use the name of the virtual environment folder. +if ($Prompt) { + Write-Verbose "Prompt specified as argument, using '$Prompt'" +} +else { + Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value" + if ($pyvenvCfg -and $pyvenvCfg['prompt']) { + Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'" + $Prompt = $pyvenvCfg['prompt']; + } + else { + Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)" + Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'" + $Prompt = Split-Path -Path $venvDir -Leaf + } +} + +Write-Verbose "Prompt = '$Prompt'" +Write-Verbose "VenvDir='$VenvDir'" + +# Deactivate any currently active virtual environment, but leave the +# deactivate function in place. +deactivate -nondestructive + +# Now set the environment variable VIRTUAL_ENV, used by many tools to determine +# that there is an activated venv. +$env:VIRTUAL_ENV = $VenvDir + +if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) { + + Write-Verbose "Setting prompt to '$Prompt'" + + # Set the prompt to include the env name + # Make sure _OLD_VIRTUAL_PROMPT is global + function global:_OLD_VIRTUAL_PROMPT { "" } + Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT + New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt + + function global:prompt { + Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) " + _OLD_VIRTUAL_PROMPT + } + $env:VIRTUAL_ENV_PROMPT = $Prompt +} + +# Clear PYTHONHOME +if (Test-Path -Path Env:PYTHONHOME) { + Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME + Remove-Item -Path Env:PYTHONHOME +} + +# Add the venv to the PATH +Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH +$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH" diff --git a/myenv/bin/activate b/myenv/bin/activate new file mode 100644 index 0000000..ce35756 --- /dev/null +++ b/myenv/bin/activate @@ -0,0 +1,69 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + # reset old environment variables + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null + fi + + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + unset VIRTUAL_ENV_PROMPT + if [ ! "${1:-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV="/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv" +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/bin:$PATH" +export PATH + +# unset PYTHONHOME if set +# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) +# could use `if (set -u; : $PYTHONHOME) ;` in bash +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" + PS1="(myenv) ${PS1:-}" + export PS1 + VIRTUAL_ENV_PROMPT="(myenv) " + export VIRTUAL_ENV_PROMPT +fi + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null +fi diff --git a/myenv/bin/activate.csh b/myenv/bin/activate.csh new file mode 100644 index 0000000..ec2cc3a --- /dev/null +++ b/myenv/bin/activate.csh @@ -0,0 +1,26 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . +# Ported to Python 3.3 venv by Andrew Svetlov + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV "/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv" + +set _OLD_VIRTUAL_PATH="$PATH" +setenv PATH "$VIRTUAL_ENV/bin:$PATH" + + +set _OLD_VIRTUAL_PROMPT="$prompt" + +if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then + set prompt = "(myenv) $prompt" + setenv VIRTUAL_ENV_PROMPT "(myenv) " +endif + +alias pydoc python -m pydoc + +rehash diff --git a/myenv/bin/activate.fish b/myenv/bin/activate.fish new file mode 100644 index 0000000..6f03e7a --- /dev/null +++ b/myenv/bin/activate.fish @@ -0,0 +1,66 @@ +# This file must be used with "source /bin/activate.fish" *from fish* +# (https://fishshell.com/); you cannot run it directly. + +function deactivate -d "Exit virtual environment and return to normal shell environment" + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + set -gx PATH $_OLD_VIRTUAL_PATH + set -e _OLD_VIRTUAL_PATH + end + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + functions -e fish_prompt + set -e _OLD_FISH_PROMPT_OVERRIDE + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + end + + set -e VIRTUAL_ENV + set -e VIRTUAL_ENV_PROMPT + if test "$argv[1]" != "nondestructive" + # Self-destruct! + functions -e deactivate + end +end + +# Unset irrelevant variables. +deactivate nondestructive + +set -gx VIRTUAL_ENV "/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv" + +set -gx _OLD_VIRTUAL_PATH $PATH +set -gx PATH "$VIRTUAL_ENV/bin" $PATH + +# Unset PYTHONHOME if set. +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # fish uses a function instead of an env var to generate the prompt. + + # Save the current fish_prompt function as the function _old_fish_prompt. + functions -c fish_prompt _old_fish_prompt + + # With the original prompt function renamed, we can override with our own. + function fish_prompt + # Save the return status of the last command. + set -l old_status $status + + # Output the venv prompt; color taken from the blue of the Python logo. + printf "%s%s%s" (set_color 4B8BBE) "(myenv) " (set_color normal) + + # Restore the return status of the previous command. + echo "exit $old_status" | . + # Output the original/"old" prompt. + _old_fish_prompt + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" + set -gx VIRTUAL_ENV_PROMPT "(myenv) " +end diff --git a/myenv/bin/distro b/myenv/bin/distro new file mode 100755 index 0000000..3211489 --- /dev/null +++ b/myenv/bin/distro @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from distro.distro import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/dotenv b/myenv/bin/dotenv new file mode 100755 index 0000000..58c8380 --- /dev/null +++ b/myenv/bin/dotenv @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from dotenv.__main__ import cli +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(cli()) diff --git a/myenv/bin/f2py b/myenv/bin/f2py new file mode 100755 index 0000000..f012c20 --- /dev/null +++ b/myenv/bin/f2py @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from numpy.f2py.f2py2e import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/httpx b/myenv/bin/httpx new file mode 100755 index 0000000..f2c8d8f --- /dev/null +++ b/myenv/bin/httpx @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from httpx import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/jsondiff b/myenv/bin/jsondiff new file mode 100755 index 0000000..5c6c55c --- /dev/null +++ b/myenv/bin/jsondiff @@ -0,0 +1,41 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import sys +import json +import jsonpatch +import argparse + + +parser = argparse.ArgumentParser(description='Diff two JSON files') +parser.add_argument('FILE1', type=argparse.FileType('r')) +parser.add_argument('FILE2', type=argparse.FileType('r')) +parser.add_argument('--indent', type=int, default=None, + help='Indent output by n spaces') +parser.add_argument('-u', '--preserve-unicode', action='store_true', + help='Output Unicode character as-is without using Code Point') +parser.add_argument('-v', '--version', action='version', + version='%(prog)s ' + jsonpatch.__version__) + + +def main(): + try: + diff_files() + except KeyboardInterrupt: + sys.exit(1) + + +def diff_files(): + """ Diffs two JSON files and prints a patch """ + args = parser.parse_args() + doc1 = json.load(args.FILE1) + doc2 = json.load(args.FILE2) + patch = jsonpatch.make_patch(doc1, doc2) + if patch.patch: + print(json.dumps(patch.patch, indent=args.indent, ensure_ascii=not(args.preserve_unicode))) + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/myenv/bin/jsonpatch b/myenv/bin/jsonpatch new file mode 100755 index 0000000..2519ceb --- /dev/null +++ b/myenv/bin/jsonpatch @@ -0,0 +1,107 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- + +import sys +import os.path +import json +import jsonpatch +import tempfile +import argparse + + +parser = argparse.ArgumentParser( + description='Apply a JSON patch on a JSON file') +parser.add_argument('ORIGINAL', type=argparse.FileType('r'), + help='Original file') +parser.add_argument('PATCH', type=argparse.FileType('r'), + nargs='?', default=sys.stdin, + help='Patch file (read from stdin if omitted)') +parser.add_argument('--indent', type=int, default=None, + help='Indent output by n spaces') +parser.add_argument('-b', '--backup', action='store_true', + help='Back up ORIGINAL if modifying in-place') +parser.add_argument('-i', '--in-place', action='store_true', + help='Modify ORIGINAL in-place instead of to stdout') +parser.add_argument('-v', '--version', action='version', + version='%(prog)s ' + jsonpatch.__version__) +parser.add_argument('-u', '--preserve-unicode', action='store_true', + help='Output Unicode character as-is without using Code Point') + +def main(): + try: + patch_files() + except KeyboardInterrupt: + sys.exit(1) + + +def patch_files(): + """ Diffs two JSON files and prints a patch """ + args = parser.parse_args() + doc = json.load(args.ORIGINAL) + patch = json.load(args.PATCH) + result = jsonpatch.apply_patch(doc, patch) + + if args.in_place: + dirname = os.path.abspath(os.path.dirname(args.ORIGINAL.name)) + + try: + # Attempt to replace the file atomically. We do this by + # creating a temporary file in the same directory as the + # original file so we can atomically move the new file over + # the original later. (This is done in the same directory + # because atomic renames do not work across mount points.) + + fd, pathname = tempfile.mkstemp(dir=dirname) + fp = os.fdopen(fd, 'w') + atomic = True + + except OSError: + # We failed to create the temporary file for an atomic + # replace, so fall back to non-atomic mode by backing up + # the original (if desired) and writing a new file. + + if args.backup: + os.rename(args.ORIGINAL.name, args.ORIGINAL.name + '.orig') + fp = open(args.ORIGINAL.name, 'w') + atomic = False + + else: + # Since we're not replacing the original file in-place, write + # the modified JSON to stdout instead. + + fp = sys.stdout + + # By this point we have some sort of file object we can write the + # modified JSON to. + + json.dump(result, fp, indent=args.indent, ensure_ascii=not(args.preserve_unicode)) + fp.write('\n') + + if args.in_place: + # Close the new file. If we aren't replacing atomically, this + # is our last step, since everything else is already in place. + + fp.close() + + if atomic: + try: + # Complete the atomic replace by linking the original + # to a backup (if desired), fixing up the permissions + # on the temporary file, and moving it into place. + + if args.backup: + os.link(args.ORIGINAL.name, args.ORIGINAL.name + '.orig') + os.chmod(pathname, os.stat(args.ORIGINAL.name).st_mode) + os.rename(pathname, args.ORIGINAL.name) + + except OSError: + # In the event we could not actually do the atomic + # replace, unlink the original to move it out of the + # way and finally move the temporary file into place. + + os.unlink(args.ORIGINAL.name) + os.rename(pathname, args.ORIGINAL.name) + + +if __name__ == "__main__": + main() diff --git a/myenv/bin/jsonpointer b/myenv/bin/jsonpointer new file mode 100755 index 0000000..e40be34 --- /dev/null +++ b/myenv/bin/jsonpointer @@ -0,0 +1,69 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import sys +import os.path +import json +import jsonpointer +import argparse + + +parser = argparse.ArgumentParser( + description='Resolve a JSON pointer on JSON files') + +# Accept pointer as argument or as file +ptr_group = parser.add_mutually_exclusive_group(required=True) + +ptr_group.add_argument('-f', '--pointer-file', type=argparse.FileType('r'), + nargs='?', + help='File containing a JSON pointer expression') + +ptr_group.add_argument('POINTER', type=str, nargs='?', + help='A JSON pointer expression') + +parser.add_argument('FILE', type=argparse.FileType('r'), nargs='+', + help='Files for which the pointer should be resolved') +parser.add_argument('--indent', type=int, default=None, + help='Indent output by n spaces') +parser.add_argument('-v', '--version', action='version', + version='%(prog)s ' + jsonpointer.__version__) + + +def main(): + try: + resolve_files() + except KeyboardInterrupt: + sys.exit(1) + + +def parse_pointer(args): + if args.POINTER: + ptr = args.POINTER + elif args.pointer_file: + ptr = args.pointer_file.read().strip() + else: + parser.print_usage() + sys.exit(1) + + return ptr + + +def resolve_files(): + """ Resolve a JSON pointer on JSON files """ + args = parser.parse_args() + + ptr = parse_pointer(args) + + for f in args.FILE: + doc = json.load(f) + try: + result = jsonpointer.resolve_pointer(doc, ptr) + print(json.dumps(result, indent=args.indent)) + except jsonpointer.JsonPointerException as e: + print('Could not resolve pointer: %s' % str(e), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/myenv/bin/langchain-server b/myenv/bin/langchain-server new file mode 100755 index 0000000..968496a --- /dev/null +++ b/myenv/bin/langchain-server @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from langchain.server import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/langsmith b/myenv/bin/langsmith new file mode 100755 index 0000000..eb40767 --- /dev/null +++ b/myenv/bin/langsmith @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from langsmith.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/nltk b/myenv/bin/nltk new file mode 100755 index 0000000..635e2ce --- /dev/null +++ b/myenv/bin/nltk @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from nltk.cli import cli +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(cli()) diff --git a/myenv/bin/normalizer b/myenv/bin/normalizer new file mode 100755 index 0000000..a0147bd --- /dev/null +++ b/myenv/bin/normalizer @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from charset_normalizer.cli import cli_detect +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(cli_detect()) diff --git a/myenv/bin/openai b/myenv/bin/openai new file mode 100755 index 0000000..b2ccffb --- /dev/null +++ b/myenv/bin/openai @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from openai.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/pip b/myenv/bin/pip new file mode 100755 index 0000000..a8242c4 --- /dev/null +++ b/myenv/bin/pip @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/pip3 b/myenv/bin/pip3 new file mode 100755 index 0000000..a8242c4 --- /dev/null +++ b/myenv/bin/pip3 @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/pip3.10 b/myenv/bin/pip3.10 new file mode 100755 index 0000000..a8242c4 --- /dev/null +++ b/myenv/bin/pip3.10 @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/pip3.11 b/myenv/bin/pip3.11 new file mode 100755 index 0000000..a8242c4 --- /dev/null +++ b/myenv/bin/pip3.11 @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/python b/myenv/bin/python new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/myenv/bin/python @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/myenv/bin/python3 b/myenv/bin/python3 new file mode 120000 index 0000000..79ab74b --- /dev/null +++ b/myenv/bin/python3 @@ -0,0 +1 @@ +/usr/local/bin/python3 \ No newline at end of file diff --git a/myenv/bin/python3.11 b/myenv/bin/python3.11 new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/myenv/bin/python3.11 @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/myenv/bin/tqdm b/myenv/bin/tqdm new file mode 100755 index 0000000..5416bfe --- /dev/null +++ b/myenv/bin/tqdm @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from tqdm.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/bin/wheel b/myenv/bin/wheel new file mode 100755 index 0000000..fc60d39 --- /dev/null +++ b/myenv/bin/wheel @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from wheel.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/myenv/include/site/python3.11/greenlet/greenlet.h b/myenv/include/site/python3.11/greenlet/greenlet.h new file mode 100644 index 0000000..d02a16e --- /dev/null +++ b/myenv/include/site/python3.11/greenlet/greenlet.h @@ -0,0 +1,164 @@ +/* -*- indent-tabs-mode: nil; tab-width: 4; -*- */ + +/* Greenlet object interface */ + +#ifndef Py_GREENLETOBJECT_H +#define Py_GREENLETOBJECT_H + + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* This is deprecated and undocumented. It does not change. */ +#define GREENLET_VERSION "1.0.0" + +#ifndef GREENLET_MODULE +#define implementation_ptr_t void* +#endif + +typedef struct _greenlet { + PyObject_HEAD + PyObject* weakreflist; + PyObject* dict; + implementation_ptr_t pimpl; +} PyGreenlet; + +#define PyGreenlet_Check(op) (op && PyObject_TypeCheck(op, &PyGreenlet_Type)) + + +/* C API functions */ + +/* Total number of symbols that are exported */ +#define PyGreenlet_API_pointers 12 + +#define PyGreenlet_Type_NUM 0 +#define PyExc_GreenletError_NUM 1 +#define PyExc_GreenletExit_NUM 2 + +#define PyGreenlet_New_NUM 3 +#define PyGreenlet_GetCurrent_NUM 4 +#define PyGreenlet_Throw_NUM 5 +#define PyGreenlet_Switch_NUM 6 +#define PyGreenlet_SetParent_NUM 7 + +#define PyGreenlet_MAIN_NUM 8 +#define PyGreenlet_STARTED_NUM 9 +#define PyGreenlet_ACTIVE_NUM 10 +#define PyGreenlet_GET_PARENT_NUM 11 + +#ifndef GREENLET_MODULE +/* This section is used by modules that uses the greenlet C API */ +static void** _PyGreenlet_API = NULL; + +# define PyGreenlet_Type \ + (*(PyTypeObject*)_PyGreenlet_API[PyGreenlet_Type_NUM]) + +# define PyExc_GreenletError \ + ((PyObject*)_PyGreenlet_API[PyExc_GreenletError_NUM]) + +# define PyExc_GreenletExit \ + ((PyObject*)_PyGreenlet_API[PyExc_GreenletExit_NUM]) + +/* + * PyGreenlet_New(PyObject *args) + * + * greenlet.greenlet(run, parent=None) + */ +# define PyGreenlet_New \ + (*(PyGreenlet * (*)(PyObject * run, PyGreenlet * parent)) \ + _PyGreenlet_API[PyGreenlet_New_NUM]) + +/* + * PyGreenlet_GetCurrent(void) + * + * greenlet.getcurrent() + */ +# define PyGreenlet_GetCurrent \ + (*(PyGreenlet * (*)(void)) _PyGreenlet_API[PyGreenlet_GetCurrent_NUM]) + +/* + * PyGreenlet_Throw( + * PyGreenlet *greenlet, + * PyObject *typ, + * PyObject *val, + * PyObject *tb) + * + * g.throw(...) + */ +# define PyGreenlet_Throw \ + (*(PyObject * (*)(PyGreenlet * self, \ + PyObject * typ, \ + PyObject * val, \ + PyObject * tb)) \ + _PyGreenlet_API[PyGreenlet_Throw_NUM]) + +/* + * PyGreenlet_Switch(PyGreenlet *greenlet, PyObject *args) + * + * g.switch(*args, **kwargs) + */ +# define PyGreenlet_Switch \ + (*(PyObject * \ + (*)(PyGreenlet * greenlet, PyObject * args, PyObject * kwargs)) \ + _PyGreenlet_API[PyGreenlet_Switch_NUM]) + +/* + * PyGreenlet_SetParent(PyObject *greenlet, PyObject *new_parent) + * + * g.parent = new_parent + */ +# define PyGreenlet_SetParent \ + (*(int (*)(PyGreenlet * greenlet, PyGreenlet * nparent)) \ + _PyGreenlet_API[PyGreenlet_SetParent_NUM]) + +/* + * PyGreenlet_GetParent(PyObject* greenlet) + * + * return greenlet.parent; + * + * This could return NULL even if there is no exception active. + * If it does not return NULL, you are responsible for decrementing the + * reference count. + */ +# define PyGreenlet_GetParent \ + (*(PyGreenlet* (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_GET_PARENT_NUM]) + +/* + * deprecated, undocumented alias. + */ +# define PyGreenlet_GET_PARENT PyGreenlet_GetParent + +# define PyGreenlet_MAIN \ + (*(int (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_MAIN_NUM]) + +# define PyGreenlet_STARTED \ + (*(int (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_STARTED_NUM]) + +# define PyGreenlet_ACTIVE \ + (*(int (*)(PyGreenlet*)) \ + _PyGreenlet_API[PyGreenlet_ACTIVE_NUM]) + + + + +/* Macro that imports greenlet and initializes C API */ +/* NOTE: This has actually moved to ``greenlet._greenlet._C_API``, but we + keep the older definition to be sure older code that might have a copy of + the header still works. */ +# define PyGreenlet_Import() \ + { \ + _PyGreenlet_API = (void**)PyCapsule_Import("greenlet._C_API", 0); \ + } + +#endif /* GREENLET_MODULE */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GREENLETOBJECT_H */ diff --git a/myenv/lib64 b/myenv/lib64 new file mode 120000 index 0000000..7951405 --- /dev/null +++ b/myenv/lib64 @@ -0,0 +1 @@ +lib \ No newline at end of file diff --git a/myenv/pyvenv.cfg b/myenv/pyvenv.cfg new file mode 100644 index 0000000..4877808 --- /dev/null +++ b/myenv/pyvenv.cfg @@ -0,0 +1,5 @@ +home = /usr/local/bin +include-system-site-packages = false +version = 3.11.0 +executable = /usr/local/bin/python3.11 +command = /usr/local/bin/python3 -m venv /home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/myenv diff --git a/path/to/your/virtualenv/bin/Activate.ps1 b/path/to/your/virtualenv/bin/Activate.ps1 new file mode 100644 index 0000000..b49d77b --- /dev/null +++ b/path/to/your/virtualenv/bin/Activate.ps1 @@ -0,0 +1,247 @@ +<# +.Synopsis +Activate a Python virtual environment for the current PowerShell session. + +.Description +Pushes the python executable for a virtual environment to the front of the +$Env:PATH environment variable and sets the prompt to signify that you are +in a Python virtual environment. Makes use of the command line switches as +well as the `pyvenv.cfg` file values present in the virtual environment. + +.Parameter VenvDir +Path to the directory that contains the virtual environment to activate. The +default value for this is the parent of the directory that the Activate.ps1 +script is located within. + +.Parameter Prompt +The prompt prefix to display when this virtual environment is activated. By +default, this prompt is the name of the virtual environment folder (VenvDir) +surrounded by parentheses and followed by a single space (ie. '(.venv) '). + +.Example +Activate.ps1 +Activates the Python virtual environment that contains the Activate.ps1 script. + +.Example +Activate.ps1 -Verbose +Activates the Python virtual environment that contains the Activate.ps1 script, +and shows extra information about the activation as it executes. + +.Example +Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv +Activates the Python virtual environment located in the specified location. + +.Example +Activate.ps1 -Prompt "MyPython" +Activates the Python virtual environment that contains the Activate.ps1 script, +and prefixes the current prompt with the specified string (surrounded in +parentheses) while the virtual environment is active. + +.Notes +On Windows, it may be required to enable this Activate.ps1 script by setting the +execution policy for the user. You can do this by issuing the following PowerShell +command: + +PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser + +For more information on Execution Policies: +https://go.microsoft.com/fwlink/?LinkID=135170 + +#> +Param( + [Parameter(Mandatory = $false)] + [String] + $VenvDir, + [Parameter(Mandatory = $false)] + [String] + $Prompt +) + +<# Function declarations --------------------------------------------------- #> + +<# +.Synopsis +Remove all shell session elements added by the Activate script, including the +addition of the virtual environment's Python executable from the beginning of +the PATH variable. + +.Parameter NonDestructive +If present, do not remove this function from the global namespace for the +session. + +#> +function global:deactivate ([switch]$NonDestructive) { + # Revert to original values + + # The prior prompt: + if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) { + Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt + Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT + } + + # The prior PYTHONHOME: + if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) { + Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME + Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME + } + + # The prior PATH: + if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) { + Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH + Remove-Item -Path Env:_OLD_VIRTUAL_PATH + } + + # Just remove the VIRTUAL_ENV altogether: + if (Test-Path -Path Env:VIRTUAL_ENV) { + Remove-Item -Path env:VIRTUAL_ENV + } + + # Just remove VIRTUAL_ENV_PROMPT altogether. + if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) { + Remove-Item -Path env:VIRTUAL_ENV_PROMPT + } + + # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether: + if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) { + Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force + } + + # Leave deactivate function in the global namespace if requested: + if (-not $NonDestructive) { + Remove-Item -Path function:deactivate + } +} + +<# +.Description +Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the +given folder, and returns them in a map. + +For each line in the pyvenv.cfg file, if that line can be parsed into exactly +two strings separated by `=` (with any amount of whitespace surrounding the =) +then it is considered a `key = value` line. The left hand string is the key, +the right hand is the value. + +If the value starts with a `'` or a `"` then the first and last character is +stripped from the value before being captured. + +.Parameter ConfigDir +Path to the directory that contains the `pyvenv.cfg` file. +#> +function Get-PyVenvConfig( + [String] + $ConfigDir +) { + Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg" + + # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue). + $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue + + # An empty map will be returned if no config file is found. + $pyvenvConfig = @{ } + + if ($pyvenvConfigPath) { + + Write-Verbose "File exists, parse `key = value` lines" + $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath + + $pyvenvConfigContent | ForEach-Object { + $keyval = $PSItem -split "\s*=\s*", 2 + if ($keyval[0] -and $keyval[1]) { + $val = $keyval[1] + + # Remove extraneous quotations around a string value. + if ("'""".Contains($val.Substring(0, 1))) { + $val = $val.Substring(1, $val.Length - 2) + } + + $pyvenvConfig[$keyval[0]] = $val + Write-Verbose "Adding Key: '$($keyval[0])'='$val'" + } + } + } + return $pyvenvConfig +} + + +<# Begin Activate script --------------------------------------------------- #> + +# Determine the containing directory of this script +$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition +$VenvExecDir = Get-Item -Path $VenvExecPath + +Write-Verbose "Activation script is located in path: '$VenvExecPath'" +Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)" +Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)" + +# Set values required in priority: CmdLine, ConfigFile, Default +# First, get the location of the virtual environment, it might not be +# VenvExecDir if specified on the command line. +if ($VenvDir) { + Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values" +} +else { + Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir." + $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/") + Write-Verbose "VenvDir=$VenvDir" +} + +# Next, read the `pyvenv.cfg` file to determine any required value such +# as `prompt`. +$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir + +# Next, set the prompt from the command line, or the config file, or +# just use the name of the virtual environment folder. +if ($Prompt) { + Write-Verbose "Prompt specified as argument, using '$Prompt'" +} +else { + Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value" + if ($pyvenvCfg -and $pyvenvCfg['prompt']) { + Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'" + $Prompt = $pyvenvCfg['prompt']; + } + else { + Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)" + Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'" + $Prompt = Split-Path -Path $venvDir -Leaf + } +} + +Write-Verbose "Prompt = '$Prompt'" +Write-Verbose "VenvDir='$VenvDir'" + +# Deactivate any currently active virtual environment, but leave the +# deactivate function in place. +deactivate -nondestructive + +# Now set the environment variable VIRTUAL_ENV, used by many tools to determine +# that there is an activated venv. +$env:VIRTUAL_ENV = $VenvDir + +if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) { + + Write-Verbose "Setting prompt to '$Prompt'" + + # Set the prompt to include the env name + # Make sure _OLD_VIRTUAL_PROMPT is global + function global:_OLD_VIRTUAL_PROMPT { "" } + Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT + New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt + + function global:prompt { + Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) " + _OLD_VIRTUAL_PROMPT + } + $env:VIRTUAL_ENV_PROMPT = $Prompt +} + +# Clear PYTHONHOME +if (Test-Path -Path Env:PYTHONHOME) { + Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME + Remove-Item -Path Env:PYTHONHOME +} + +# Add the venv to the PATH +Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH +$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH" diff --git a/path/to/your/virtualenv/bin/activate b/path/to/your/virtualenv/bin/activate new file mode 100644 index 0000000..e28c6fd --- /dev/null +++ b/path/to/your/virtualenv/bin/activate @@ -0,0 +1,69 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + # reset old environment variables + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null + fi + + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + unset VIRTUAL_ENV_PROMPT + if [ ! "${1:-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV="/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv" +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/bin:$PATH" +export PATH + +# unset PYTHONHOME if set +# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) +# could use `if (set -u; : $PYTHONHOME) ;` in bash +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" + PS1="(virtualenv) ${PS1:-}" + export PS1 + VIRTUAL_ENV_PROMPT="(virtualenv) " + export VIRTUAL_ENV_PROMPT +fi + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null +fi diff --git a/path/to/your/virtualenv/bin/activate.csh b/path/to/your/virtualenv/bin/activate.csh new file mode 100644 index 0000000..41c7753 --- /dev/null +++ b/path/to/your/virtualenv/bin/activate.csh @@ -0,0 +1,26 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . +# Ported to Python 3.3 venv by Andrew Svetlov + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV "/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv" + +set _OLD_VIRTUAL_PATH="$PATH" +setenv PATH "$VIRTUAL_ENV/bin:$PATH" + + +set _OLD_VIRTUAL_PROMPT="$prompt" + +if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then + set prompt = "(virtualenv) $prompt" + setenv VIRTUAL_ENV_PROMPT "(virtualenv) " +endif + +alias pydoc python -m pydoc + +rehash diff --git a/path/to/your/virtualenv/bin/activate.fish b/path/to/your/virtualenv/bin/activate.fish new file mode 100644 index 0000000..54225c9 --- /dev/null +++ b/path/to/your/virtualenv/bin/activate.fish @@ -0,0 +1,66 @@ +# This file must be used with "source /bin/activate.fish" *from fish* +# (https://fishshell.com/); you cannot run it directly. + +function deactivate -d "Exit virtual environment and return to normal shell environment" + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + set -gx PATH $_OLD_VIRTUAL_PATH + set -e _OLD_VIRTUAL_PATH + end + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + functions -e fish_prompt + set -e _OLD_FISH_PROMPT_OVERRIDE + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + end + + set -e VIRTUAL_ENV + set -e VIRTUAL_ENV_PROMPT + if test "$argv[1]" != "nondestructive" + # Self-destruct! + functions -e deactivate + end +end + +# Unset irrelevant variables. +deactivate nondestructive + +set -gx VIRTUAL_ENV "/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv" + +set -gx _OLD_VIRTUAL_PATH $PATH +set -gx PATH "$VIRTUAL_ENV/bin" $PATH + +# Unset PYTHONHOME if set. +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # fish uses a function instead of an env var to generate the prompt. + + # Save the current fish_prompt function as the function _old_fish_prompt. + functions -c fish_prompt _old_fish_prompt + + # With the original prompt function renamed, we can override with our own. + function fish_prompt + # Save the return status of the last command. + set -l old_status $status + + # Output the venv prompt; color taken from the blue of the Python logo. + printf "%s%s%s" (set_color 4B8BBE) "(virtualenv) " (set_color normal) + + # Restore the return status of the previous command. + echo "exit $old_status" | . + # Output the original/"old" prompt. + _old_fish_prompt + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" + set -gx VIRTUAL_ENV_PROMPT "(virtualenv) " +end diff --git a/path/to/your/virtualenv/bin/pip b/path/to/your/virtualenv/bin/pip new file mode 100755 index 0000000..3944467 --- /dev/null +++ b/path/to/your/virtualenv/bin/pip @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/pip3 b/path/to/your/virtualenv/bin/pip3 new file mode 100755 index 0000000..3944467 --- /dev/null +++ b/path/to/your/virtualenv/bin/pip3 @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/pip3.10 b/path/to/your/virtualenv/bin/pip3.10 new file mode 100755 index 0000000..3944467 --- /dev/null +++ b/path/to/your/virtualenv/bin/pip3.10 @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/pip3.11 b/path/to/your/virtualenv/bin/pip3.11 new file mode 100755 index 0000000..3944467 --- /dev/null +++ b/path/to/your/virtualenv/bin/pip3.11 @@ -0,0 +1,8 @@ +#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/python b/path/to/your/virtualenv/bin/python new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/path/to/your/virtualenv/bin/python @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/path/to/your/virtualenv/bin/python3 b/path/to/your/virtualenv/bin/python3 new file mode 120000 index 0000000..79ab74b --- /dev/null +++ b/path/to/your/virtualenv/bin/python3 @@ -0,0 +1 @@ +/usr/local/bin/python3 \ No newline at end of file diff --git a/path/to/your/virtualenv/bin/python3.11 b/path/to/your/virtualenv/bin/python3.11 new file mode 120000 index 0000000..b8a0adb --- /dev/null +++ b/path/to/your/virtualenv/bin/python3.11 @@ -0,0 +1 @@ +python3 \ No newline at end of file diff --git a/path/to/your/virtualenv/lib64 b/path/to/your/virtualenv/lib64 new file mode 120000 index 0000000..7951405 --- /dev/null +++ b/path/to/your/virtualenv/lib64 @@ -0,0 +1 @@ +lib \ No newline at end of file diff --git a/path/to/your/virtualenv/pyvenv.cfg b/path/to/your/virtualenv/pyvenv.cfg new file mode 100644 index 0000000..e7250de --- /dev/null +++ b/path/to/your/virtualenv/pyvenv.cfg @@ -0,0 +1,5 @@ +home = /usr/local/bin +include-system-site-packages = false +version = 3.11.0 +executable = /usr/local/bin/python3.11 +command = /usr/local/bin/python3 -m venv /home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv From abbf9a068f4137b58277b16eb153f7e1a8ffd111 Mon Sep 17 00:00:00 2001 From: "mihai.state" Date: Mon, 25 Mar 2024 23:13:51 +0200 Subject: [PATCH 4/6] Enhanced the output reduction Created new output scenarious in the config.json file for NaN check, struct fields check, deadlock check and data races check Created the reduce_output field in the config.json file to tell the LLM to produce a smaller output Created new regex patterns in the reduce_output2() function Created the remove_patterns_nltk that uses NLP methods to identify patterns in a sequence of tokenized words. The advantage of it are that it provides more granular control over text sentences, it can be more precise in some situations. Created the test_output_reducer file that tests the functionality of the esbmc_output_optimisation(), reduce_output2() and remove_patterns_nltk() functions that covers all the string patterns Instantiated the GPT_4_TURBO_PREVIEW model in the enum of the ai_models.py and used it for various code bases. Signed-off-by: mihai.state --- DOCUMENTATION.md | 5 + config.json | 87 ++++++++++++-- esbmc_ai/__main__.py | 2 +- esbmc_ai/ai_models.py | 1 + esbmc_ai/esbmc_util.py | 213 ++++++++++++++++++++++------------- tests/test_output_reducer.py | 174 ++++++++++++++++++++++++++++ 6 files changed, 395 insertions(+), 87 deletions(-) create mode 100644 DOCUMENTATION.md create mode 100644 tests/test_output_reducer.py diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md new file mode 100644 index 0000000..5e3908c --- /dev/null +++ b/DOCUMENTATION.md @@ -0,0 +1,5 @@ +# Contribution + +I created three functions for reducing the output of ESBMC-AI by using regex patterns and the nltk library. Thes functions are created in the esbmc_ai/esbmc_util.py file, with the names esbmc_ouput_optimisation(), reduce_output2() and remove_patterns_nltk() and called in the esbmc() function. They work successfully on reducing the ESBMC-AI output with the line patterns that are created inside them. All of the patterns have been tested on varios ESBMC-Ai outputs and they work well. Besides that, I created more output scenarious in the config.json file: null pointer, arthmetic overflow, buffer overflow, struct-fileds check, NaN check, array out-of-bounds, division-by-zero + +To run the test file that I created, go to the esbmc_ai_fork/esbmc-ai/ folder and run this command python -m tests.test_output_reducer \ No newline at end of file diff --git a/config.json b/config.json index e855782..a29176c 100644 --- a/config.json +++ b/config.json @@ -28,7 +28,7 @@ "system": [ { "role": "System", - "content": "You are a security focused assistant that parses output from a program called ESBMC and explains the output to the user. ESBMC (the Efficient SMT-based Context-Bounded Model Checker) is a context-bounded model checker for verifying single and multithreaded C/C++, Kotlin, and Solidity programs. It can automatically verify both predefined safety properties (e.g., bounds check, pointer safety, overflow) and user-defined program assertions. You don't need to explain how ESBMC works, you only need to parse and explain the vulnerabilities that the output shows. For each line of code explained, say what the line number is as well. Do not answer any questions outside of these explicit parameters. If the initial output exceeds the token limit of the respective LLM model, remove the sequences (sentences or parts of sentences) that are related to any kind of assertions, assumptions, time measurements, that are in seconds, to the GOTO program, to the Boolector solver, bitvector, floating-point arithmetic, remaining VCCs and try to use less words to explain something if possible, create a larger sentence instead of two ones that have a direct corelation if result is not a very large one. Do not put time measurements (that are in seconds) in the output(e.g ., Symex completed in: 0.018s), like encoding time, runtime, slicing time, runtime decision procedure, do not use explanations for slicing. Avoid even mentioning any time measurements. If you understand, reply OK." + "content": "You are a security focused assistant that parses output from a program called ESBMC and explains the output to the user. ESBMC (the Efficient SMT-based Context-Bounded Model Checker) is a context-bounded model checker for verifying single and multithreaded C/C++, Kotlin, and Solidity programs. It can automatically verify both predefined safety properties (e.g., bounds check, pointer safety, overflow) and user-defined program assertions. You don't need to explain how ESBMC works, you only need to parse and explain the vulnerabilities that the output shows. For each line of code explained, say what the line number is as well. Do not answer any questions outside of these explicit parameters. If the initial output exceeds the token limit of the respective LLM model, reduce it by making shorter phrases. Also, remove the sequences (sentences or parts of sentences) that are related to any kind of assertions, assumptions, time measurements, that are in seconds, to the GOTO program, to the Boolector solver, bitvector, floating-point arithmetic, remaining VCCs and try to use less words to explain something if possible, create a larger sentence instead of two ones that have a direct corelation if result is not a very large one. Do not put time measurements (that are in seconds) in the output(e.g ., Symex completed in: 0.018s), like encoding time, runtime, slicing time, runtime decision procedure, do not use explanations for slicing. Avoid even mentioning any time measurements. Avoid mentioning warnings produced by the ESBMC output. If you understand, reply OK." }, { "role": "AI", @@ -64,10 +64,35 @@ } ] }, - "generate_solution": { "temperature": 1.3, "scenarios": { + "null pointer":{ + "system":[ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a null pointer issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a null pointer might occur. The solution should prevent unexpected behaviour, crashes or vulnerabilites due to the null pointer. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles mull pointers effectively.\nGuidelines: Always initialize pointers: Uninitialized pointers may cause a segmentation fault or can point to some arbitrary memory location. Use null pointers to represent 'no object' or 'unknown'. Free memory properly: Once the memory pointed by a pointer is freed, set the pointer to null. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the null pointer vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + + }, + "nan check": { + "system": [ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a floating point for a NaN issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a floating point for NaN might occur. The solution should prevent undefined behaviour, maintain program stability, enhance error reporting, improve data integrity, ensure correctness of results. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles floating points for NaN effectively.\nGuidelines: Use specific functions, such as isnan() in C/C++. Report an error or warning message to the user or logging system for NaN values. Provide default or fallback values to continue with calculations. Adjust the algorithm or computation to avoid generatign NaN values. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the flaoting point NaN vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + }, + "division by zero": { "system": [ { @@ -91,8 +116,21 @@ "content": "OK." } ] - } - , + }, + "struct-fields-check":{ + "system": [ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a struct-fields-check issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where over-sized reads for field-check might occur. The solution should prevent uninitialized fields, memory issues, ensure validation and ease of maintenance, and keep data consistency. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behavior and handles struct-fields effectively.\nGuidelines: Implement assertions for the array indices. Output: Provide the corrected, complete C code. The solution should compile and run error-free, addressing the struct-fields vulnerability.\nStart the code snippet with ```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + + }, + "arithmetic overflow":{ "system":[ { @@ -128,10 +166,30 @@ "content": "OK." } ] + }, + "deadlock-check":{ + "system":[ + { + "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a memory leak issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a deadlock might occur. The solution should prevent unexpected behaviour, chrases, security vulnerabilities, performance issues and maintainability issues.\nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids data races and can handle them effectively in the same time.\nGuidelines: Implement methods that do proper resource allocation to prevent circular wait conditions, create a deadlock detection mechanism, so the system can take steps to recover from it, or, alternatively, use the Ostrich Algorithm, which ignores the problem on the assumption that it occurs rarely, and, if it happens, the system is rebooted. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the deadlock vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] + }, + "data-races-check":{ + "system":[ + { "role": "System", + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a data race issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a data race might occur. The solution should prevent data races, detect them and do a system recovery, remove the resources from processes and give them to others until deadlock is broken. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids data races and can handle them effectively in the same time.\nGuidelines: Implement methods that blocks read access until the write operation has been completed, use a mutex to allow multiple program threads to share the same resource, such as file file access, but not simultaneously, or perform lock-free programming, create atomic operations, do thread-local storage. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the data-race vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK." + } + ] } - }, - "null pointer":{ - }, "system": [ { @@ -182,6 +240,19 @@ "--max-k-step", "10" ] - } + }, + "reduce_output": { + "temperature": 1.0, + "system": [ + { + "role": "System", + "content": "You are an output reducer. You are given the ESBMC output interpretation. The reduced output that you generate needs to have less lines than the original. From this point on, you can only reply in the output. You shall only output the new produced about by the LLM, that is only smaller and more concise than the initial one. Reply OK if you understand." + }, + { + "role": "AI", + "content": "OK" + } + ] + } } } \ No newline at end of file diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index ce3165b..47d017f 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -89,7 +89,7 @@ def check_health() -> None: else: print(f"Error: ESBMC could not be found in {config.esbmc_path}") sys.exit(3) - + def print_assistant_response( chat: UserChat, diff --git a/esbmc_ai/ai_models.py b/esbmc_ai/ai_models.py index e0f718e..8e2cc0b 100644 --- a/esbmc_ai/ai_models.py +++ b/esbmc_ai/ai_models.py @@ -259,6 +259,7 @@ class AIModels(Enum): GPT_3_16K = AIModelOpenAI(name="gpt-3.5-turbo-16k", tokens=16384) GPT_4 = AIModelOpenAI(name="gpt-4", tokens=8192) GPT_4_32K = AIModelOpenAI(name="gpt-4-32k", tokens=32768) + GPT_4_TURBO_PREVIEW= AIModelOpenAI(name="gpt-4-turbo-preview", tokens=128000) FALCON_7B = AIModelTextGen( name="falcon-7b", tokens=8192, diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 1fda085..ef90a07 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -6,14 +6,11 @@ from subprocess import Popen, PIPE, STDOUT from pathlib import Path import esbmc_ai.config as config -from esbmc_ai.ai_models import _ai_model_names from . import config import nltk from nltk.tokenize import word_tokenize,sent_tokenize +nltk.download('punkt') -def optimize_output(output): - print("Optimiziiiiiingg aaaaaa") - return output def esbmc(path: str, esbmc_params: list): """Exit code will be 0 if verification successful, 1 if verification @@ -30,123 +27,183 @@ def esbmc(path: str, esbmc_params: list): exit_code = process.wait() output: str = str(output_bytes).replace("\\n", "\n") err: str = str(err_bytes).replace("\\n", "\n") - #output = optimize_output(output) - # print("HELLO before the call") output = esbmc_output_optimisation(output) - # print("HELLO after the code") - #sys.stdout.flush() + output = reduce_output2(output) + output = remove_patterns_nltk(output) + return exit_code, output, err def esbmc_output_optimisation(esbmc_output:str) -> str: - esbmc_output =re.sub(r'^\d+. The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'^\d+. This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'(^\d+\.\s*)?The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'(^\d+\.\s*)?This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'[-]+', '', esbmc_output) # Remove lines of dashes - esbmc_output = re.sub(r'\b[0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8}\b', '', esbmc_output) # Remove hex patterns + + esbmc_output = re.sub(r'\(*\b[0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8}\b\)*', '', esbmc_output) # Remove hex patterns esbmc_output = re.sub(r'^Line \d+: ESBMC is using the Boolector solver \d+\.\d+\.\d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) # Remove Boolector lines esbmc_output = re.sub(r'\d+. ESBMC is using the Boolector solver \d+\. \d+\. \d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^Line \d+: The solver takes \d+\.\d+s to determine the runtime decision procedure\.$', '', esbmc_output, flags=re.MULTILINE) # Remove solver time lines - esbmc_output = re.sub(r'.*Boolector.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output =re.sub(r'/\*.*?\*/', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*Boolector.*\n\n', '', esbmc_output, flags=re.MULTILINE) pattern = r"- `.*Slicing time: \d+\.\d+s \(removed \d+ assignments\)`.*: ESBMC has performed slicing, a technique that removes irrelevant assignments from the program and reduces the complexity of analysis." esbmc_output = re.sub(pattern, '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\n\*s\n', '\n', esbmc_output) + + esbmc_output = re.sub(r'\n\s*\n', '\n', esbmc_output) esbmc_output = esbmc_output.replace("output from ESBMC", "ESBMC output") - #esbmc_output = re.sub(r'.*runtime decision.*', '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'The time.*', '', esbmc_output, flags=re.MULTILINE) - #VCCs represent the assertions in the code that need to be verified - esbmc_output = re.sub(r'VCCs represent.*', '', esbmc_output, flags=re.MULTILINE) - #if(config.ai_model.name != "gpt-4-32K"): - #ESBMC then encodes the remaining VCCs using bitvector and floating-point arithmetic, and displays the time taken for this encoding process. - esbmc_output = re.sub(r'.*remaining VCCs.*', '', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'[^.]VCCs represent.*|[^.]These VCCs represent.*', '', esbmc_output, flags=re.MULTILINE) #These types of lines represent some sequences of VCCs that can easily be ommitted for development purposes esbmc_output = re.sub(r'.*bitvector.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\s*\(\s*`thread\d+`(?:,\s*`thread\d+`)*\s*\)', '', esbmc_output) + esbmc_output = re.sub(r'.*floating-point arithmetic.*', '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'.*encoding.*', '', esbmc_output, flags = re.MULTILINE) - esbmc_output = re.sub(r'.*GOTO program.*', '', esbmc_output, flags=re.MULTILINE) - #Next, ESBMC states that it is starting the runtime decision procedure. This is the step where ESBMC makes concrete assignments to the program variables to check the property being verified. - #It creates VCCs (verification conditions) and starts encoding them to a solver to solve the constraints. - #ESBMC encodes these VCCs to a solver, and in this case, the encoding time is shown as 0.019s. - #ESBMC encodes these VCCs to a solver, and in this case, the encoding time is shown as 0.019s. ESBMC then uses a runtime decision procedure to solve the encoded formulas, which takes 1.351s for this analysis. The decision procedure involves executing various branches of the program symbolically and making decisions based on constraints and conditions. esbmc_output = re.sub(r'.*encoding time.*', '', esbmc_output, flags= re.MULTILINE) - esbmc_output = re.sub(r', which takes \d+\.\d+s \.*', '.', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'The output indicates that ESBMC generates a verification condition \(VCC\) for each possible path and encodes them into a solver\.', '', esbmc_output, flags= re.MULTILINE) - # esbmc_output = re.sub(r'In this case, the decision procedure took \d+\.\d+s \. seconds.','', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r', which takes \d+\.\d+s\.*', '.', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'The output indicates that ESBMC generates a verification condition \(VCC\) for each possible path and encodes them into a solver\.', '', esbmc_output, flags= re.MULTILINE) #This information can be disregarded for development purposes esbmc_output = re.sub(r'Measures the [^.]+?encoding time [^.]+?\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'In this case, the procedure took [0-9.]+ seconds\.', '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'\s*It measures[^.]+? encoding time [^.]+?\.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\.\s*\n', '.\n', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*? GOTO Program[^.]*\.\s*', '', esbmc_output) - esbmc_output = re.sub(r'.*? GOTO program[^.]*\.\s*', '', esbmc_output) + esbmc_output = re.sub(r'[^.]*GOTO program[^.]*\.', '', esbmc_output) + esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*time.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*.*time.*\n\n|\.\s*[^.]*time[^.]*\.', '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'.*program time: \d+\.\d+s \.', '',esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'"Runtime decision procedure: \d+\. \d+s" \.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time: \d+\.\d+s', '', esbmc_output, flags= re.MULTILINE) - esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time: \d+\.\d+s', '', esbmc_output, flags= re.MULTILINE) - esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*Symex.*|.*symex.*|.*symbolic execution.*', '', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time:\d+\.\d+s', '', esbmc_output, flags= re.MULTILINE) + esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s\"*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*Symex.*|.*symex.*|.*symbolic execution.*\n\n', '', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'After[^.]*?, the code', 'The program then', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'.*assignments.*|.*assignment.*', '', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'.*iteratively.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*branches.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*analysis process.*\s','', esbmc_output, flags=re.MULTILINE ) + + esbmc_output = re.sub(r'.*branches.*', '', esbmc_output, flags=re.MULTILINE) #The informations about branches are not necessary relevant to the understanding of bugs + esbmc_output = re.sub(r'.*analysis process.*\s','', esbmc_output, flags=re.MULTILINE ) esbmc_output = re.sub(r'.*remaining VCC.*\s|.*remaining VCCs.*\s','', esbmc_output) - esbmc_output = re.sub(r'.*0 VCCs.*', '', esbmc_output, flags= re.MULTILINE) # if there are no more VCCs left, the user is not going to take action in this regard. - esbmc_output = re.sub(r'Finally, ESBMC confirms that the verfication[^.]"VERIFICATION SUCCESSFUL."', 'Finally, ESBMC indicates a successful verification.', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'.*0 VCCs.*', '', esbmc_output, flags= re.MULTILINE) # If there are no more VCCs left, the user is not going to take action in this regard. + esbmc_output = re.sub(r'Finally, ESBMC confirms that the verfication.*"VERIFICATION SUCCESSFUL."', 'Finally, ESBMC indicates a successful verification.', esbmc_output, flags=re.MULTILINE) esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*.*"Symex completed in:.*".*\s', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*These warnings are about.*', '', esbmc_output, flags=re.MULTILINE) #Warning information are not useful for debugging purposes + + esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'Let\'s[^.]the[^.]code and explain the relevant parts of the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'virtual_table::std::.*', '', esbmc_output, flags=re.MULTILINE) - ##assigments verification + esbmc_output = re.sub(r'foo =.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'the .*Efficient SMT-Based Context-Bounded Model Checker \(ESBMC\)', 'ESBMC', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'(Efficient SMT-Based Bounded Model Checker)', '', esbmc_output, flags=re.MULTILINE) + return esbmc_output -def reduce_output2(esbmc_output: str) -> str: - - esbmc_output = re.sub(r'\.\s*[^.]*symbolically execute the program[^.]*\.', '', esbmc_output, flags=re.MULTILINE) +def reduce_output2(esbmc_output: str) -> str: + + esbmc_output = re.sub(r'\.\s*[^.]*symbolically execute the program[^.]*\.', '', esbmc_output, flags=re.MULTILINE) #Reffers to a time measurement + esbmc_output = re.sub('In this case, it initially starts with', 'It starts with', esbmc_output, flags= re.MULTILINE) + esbmc_output = re.sub(r'(.*)\s*before moving forward', r'\1', esbmc_output, flags=re.MULTILINE) #removes the specified sequence, as it is trivial + + esbmc_output = re.sub(r'.*k=0.*', '', esbmc_output, flags=re.MULTILINE) # for k=0, which is the base case, there is no need for an explanation for development purposes, the user would not find this information helpful + esbmc_output = re.sub(r'the Efficient SMT-Based Context-Bounded Model Checker \(ESBMC\)', 'ESBMC', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r', which stands for Efficient SMT-Based Countext-Bounded Model Checker', '.', esbmc_output, flags=re.MULTILINE ) + + esbmc_output = re.sub(r'.*Warnings.*|.*warnings.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*\*\*Warnings\*\*.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*warning.*', '', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub('.*A series of warnings.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'ESBMC \(the Efficient SMT-based Context-Bounded Model Checker\)', 'ESBMC', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'These messages relate how ESBMC handles loops.*\.|These messages relate.*\.', '', esbmc_output, flags=re.MULTILINE) #These type of lines can be ommitted, if there is anything relevant about loops, ESBMC-AI can display other more precise explanations + + esbmc_output = re.sub(r'which, in simple terms,', 'which', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'After[^.]*iterations[^.]*\.', '', esbmc_output, flags=re.MULTILINE) #These types of line can be removed to make the output easier to read, without ommitting any important information + esbmc_output = re.sub(r'[^.]*iterations[^.]*\.', '.', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'Initially, ESBMC parses the given source file. It identifies', 'ESBMC identifies', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*.*parsing the source file.*|\.\s*.parses the source file.*|\s*.*parsing the source file.*|\s*.*starts by parsing the program.*|\s*.*starts by parsing the.*', '', esbmc_output, flags=re.MULTILINE) # this is an additional information that ESBMC-AI provides, it is not useful for understanding the behaviour of the tested code + esbmc_output = re.sub(r'\*\*Parsing and Compilation Warnings:\*\*', '', esbmc_output, flags=re.MULTILINE) #This represents a type of header in the output, which is followed by an explanatory paragraph, its removal doesn't affect the understandability of the parsing and warning explanations + + esbmc_output = re.sub(r'### Conclusion.*(?:\n{2,}|$)|### In Conclusion.*(?:\n{2,}|$)', '', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'\*\*Conclusion:\*\*.*(?:\n{2,}|$)', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(", revealing the tool's version for this context", '.', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'exit condition \(`break`, `return`,.*?\)', 'exit condition (like `break` or `return`)', esbmc_output, flags=re.MULTILINE) #keeps the message concise, displays only the relevant information + + esbmc_output = re.sub(r'.*Slicing.*\.|.*slicing.*\.', '', esbmc_output, flags=re.MULTILINE) #Slicing information doesn't help for understanding bugs or approach them differently + esbmc_output = re.sub(r'The output contains.*are not relevant.*\n', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'such as the decision procedure used (`ESBMC-boolector`) and', 'such as', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'These details are not directly related to understanding the vulnerabilities in the code.*|\.\s*.*are not direclty related to understanding the vulnerabilities.*|\.\s*.*are not relevant to understanding the vulnerabilities.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\s*\(\s*`thread\d+`(?:,\s*`thread\d+`)*\s*\)', '', esbmc_output) #It is not neccesary to mention threads in parantheses, as they are all explained afterwards + esbmc_output = re.sub(r'.*(`ESBMC-boolector`).*', '', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'^\s*\n', '', esbmc_output) + esbmc_output = re.sub(r'\n{2,}', '\n', esbmc_output) + esbmc_output = re.sub(r': The version of ESBMC.*', '', esbmc_output, flags=re.MULTILINE) + + esbmc_output = re.sub(r'\*\*"No bug has been found in the base case"\*\*:.*', '**"No bug has been found in the base case"**', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r' This suggests that the system modeled by the program, under the constraints and assertions defined, behaves as expected without leading to any undesired or erroneous states within the bounded context.', '', esbmc_output, flags=re.MULTILINE) #The output is more concise without this sentence, and all relevant aspects are still provided. + return esbmc_output -# def remove_runtime_decision_procedure(text): -# # Tokenize the text into sentences -# sentences = sent_tokenize(text) +def remove_patterns_nltk(text): + # Tokenize the text into sentences + sentences = text.split('\n') -# # Define the pattern to be removed -# pattern = "Runtime decision procedure:" -# pattern2 = "BMC program time:" + # Define the pattern to be removed + pattern = "Runtime decision procedure:" + pattern2 = "BMC program time:" + thread_pattern = re.compile(r'\(`thread\d+`(?:,\s*`thread\d+`)*\)\s', re.IGNORECASE) + warning_pattern = re.compile(r'- \*\*Warnings\*\*:.*', re.IGNORECASE) + warning_pattern2 = re.compile(r'A series of wanings are issued', re.IGNORECASE) + conclusion_pattern = re.compile(r'### In Conclusion ###.*', re.IGNORECASE) + bmc_time = re.compile(r'BMC program time:\d+\.\d+s') + runtime_pattern =re.compile(r'Runtime decision procedure:\d+\.\d+s') -# # Iterate over each sentence -# for i in range(len(sentences)): -# # Tokenize the sentence into words -# words = word_tokenize(sentences[i]) + # Iterate over each sentence + for i in range(len(sentences)): + # Tokenize the sentence into words + words = word_tokenize(sentences[i]) -# # If the pattern is in the sentence, remove it -# if pattern in words: -# # Find the index of the pattern -# index = words.index(pattern) + # If the pattern is in the sentence, remove it + if pattern in words: + # Find the index of the pattern + index = words.index(pattern) -# # Remove the pattern and the next two words (the time and 's') -# if len(words) > index + 2 and words[index + 2] == 's': -# del words[index:index+3] + # Remove the pattern and the next two words (the time and 's') + if len(words) > index + 2 and words[index + 2] == 's': + del words[index:index+3] -# # Join the words back into a sentence -# sentences[i] = ' '.join(words) -# if pattern2 in words: -# index = words.index(pattern2) - -# if len(words) > index+2 and words[index+2] == 's': -# del words[index:index+3] -# sentences[i] = ' '.join(words) + # Join the words back into a sentence + sentences[i] = ' '.join(words) + if pattern2 in words: + index = words.index(pattern2) + + if len(words) > index+2 and words[index+2] == 's': + del words[index:index+3] + sentences[i] = ' '.join(words) + + # Remove thread patterns + cleaned_sentences = [thread_pattern.sub('', sentence) for sentence in sentences] + cleaned_sentences = [warning_pattern.sub('', sentence) for sentence in cleaned_sentences] + cleaned_sentences = [warning_pattern2.sub('', sentence) for sentence in cleaned_sentences] + cleaned_sentences = [conclusion_pattern.sub('', sentence) for sentence in cleaned_sentences] + cleaned_sentences = [bmc_time.sub('', sentence) for sentence in cleaned_sentences] + cleaned_sentences = [runtime_pattern.sub('', sentence) for sentence in cleaned_sentences] + + cleaned_sentences = [line for line in cleaned_sentences if line.strip() !=''] -# # Join the sentences back into a text -# modified_text = ' '.join(sentences) + # Join the sentences back into a text + modified_text = '\n'.join(cleaned_sentences) -# return modified_text + return modified_text -# text = "This is a test sentence. Runtime decision procedure: 1.387s I want this part to remain." -# print(remove_runtime_decision_procedure(text)) def esbmc_load_source_code( file_path: str, diff --git a/tests/test_output_reducer.py b/tests/test_output_reducer.py new file mode 100644 index 0000000..7a2c6f5 --- /dev/null +++ b/tests/test_output_reducer.py @@ -0,0 +1,174 @@ +import re +import sys +import nltk +import unittest +from nltk.tokenize import word_tokenize,sent_tokenize +print(sys.path) + +from esbmc_ai import esbmc_util +nltk.download('punkt') + +class TestEsbmcOutputOptimisation(unittest.TestCase): + + def test_esbmc_output_optimisation(self): + self.maxDiff = None + input = '''Starting with the source code, we see that it includes some external functions and declares global variables. These variables are of type int and are initialized with non-deterministic values using the `__VERIFIER_nondet_int()` function. The `assume()` function is then used to specify assumptions about the values of these variables + +Now let's look at the output from ESBMC. It starts with information about the version of ESBMC being used. Then it mentions the target platform and the C file being parsed. It indicates that the program is being converted into a GOTO program, which is the intermediate representation used by ESBMC for verification. The GOTO program creation time and processing time are also mentioned. + +ESBMC then provides details about the symex (symbolic execution) process, including the time taken and the number of assignments. It mentions that slicing was performed, removing some assignments. + +After that, it mentions the number of verification condition checks (VCCs) generated and the number remaining after simplification. These VCCs represent the conditions that need to be checked for the program to be considered correct. +1a2b3c4d 5e6f7a8b 9c0d1e2f 3a4b5c6d +Line 123: ESBMC is using the Boolector solver 1.2.3 to solve the encoding. +This is the result, which takes 1.34s. +ESBMC then mentions the solver being used, which is Boolector in this case. It indicates the time taken for encoding the VCCs and the time taken for solving using the specified solver. +Overall, the output from ESBMC shows the analysis process, including parsing, conversion, verification condition generation, encoding, solving, and bug trace generation. +Finally, it mentions the runtime decision procedure and the overall time taken for the BMC (Bounded Model Checking) program. +In this case, ESBMC reports "VERIFICATION SUCCESSFUL" and mentions that a solution was found by the forward condition. This means that all states in the program are reachable. +Finally, ESBMC confirms that the verfication has been successful: "VERIFICATION SUCCESSFUL."''' + + input01 = '''1. The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver. +This output states that the solving process with the Boolector solver took a certain amount of time. +----------------------------------------- +var01 = 0 (00000000 00000000 00000000 00000000) +Line 1: ESBMC is using the Boolector solver 3.3.2 to solve the encoding. +Line 3: The solver takes 2.3s to determine the runtime decision procedure. +The time it took the Boolector solve was 3.24s +- `Slicing time: 2.3s (removed 3 assignments\)`: ESBMC has performed slicing, a technique that removes irrelevant assignments from the program and reduces the complexity of analysis. +''' + + input02 = '''Now let's look at the output from ESBMC. It starts with information about the version of ESBMC being used. +virtual_table::std::ios@tag-std::iostream = { .~ios=&~ios } +foo = ( Foo *)(*dynamic_2_value) +The decision procedure involves executing various branches of the program symbolically and making decisions based on constraints and conditions. +Let's go through the source code and the output of ESBMC to understand what is happening.''' + + expected_output = '''Starting with the source code, we see that it includes some external functions and declares global variables. These variables are of type int and are initialized with nondeterministic values using the `__VERIFIER_nondet_int()` function. The `assume()` function is then used to specify assumptions about the values of these variables + +After that, it mentions the number of verification condition checks (VCCs) generated and the number remaining after simplification. +This is the result +In this case, ESBMC reports "VERIFICATION SUCCESSFUL" and mentions that a solution was found by the forward condition. This means that all states in the program are reachable. +Finally, ESBMC indicates a successful verification.''' + + expected_output01 =''' +var01 = 0 + + +''' + + expected_output02 ='''Now let's look at the ESBMC output. It starts with information about the version of ESBMC being used. + + + +''' + + + output_str = esbmc_util.esbmc_output_optimisation(input) + #print("The output is: ") + #print(output_str) + #print(expected_output) + + self.assertEqual(output_str, expected_output) + output_str1 = esbmc_util.esbmc_output_optimisation(input01) + self.assertEqual(output_str1, expected_output01) + output_str2 = esbmc_util.esbmc_output_optimisation(input02) + self.assertEqual(output_str2, expected_output02) + + + def test_reduce_output2(self): + input2 = '''These messages relate how ESBMC handles loops. +After 4 iterations, ESBMC found some bugs into the code. +the output of the Efficient SMT-Based Context-Bounded Model Checker (ESBMC) goes through some verifications and iterates 5 times to find a bug. +Multiple threads (`thread1`, `thread2`, `thread3`) perform operations on a shared variable `data` +The output contains various debug and diagnostic messages, which are not relevant to understanding the vulnerabilities in the code +The ESBMC will go thorugh some initial verifications before moving forward +Following this, it moves on to parsing the source file of the elevator system, which generally means it starts to interpret and prepare the program for model checking. +1. **Parsing and Conversion**: ESBMC starts by parsing the given C file and converting it into an internal representation. +2. **GOTO Program Creation**: The tool generates a GOTO program. GOTO programs abstract away complex control flow to simplify the analysis. This step took approximately 0.343 seconds, with post-processing taking an additional 0.028 seconds. +3. **Bounded Model Checking (BMC)**: BMC involves checking the truth of properties up to a certain bound. Here, `k = 1` indicates that the checking is performed up to depth 1. Initially, ESBMC is checking the base case to ensure no bugs are present. +#Parsing and Compilation Warnings: +''' + + input3 = '''3. **Bounded Model Checking (BMC)**: BMC involves checking the truth of properties up to a certain bound. Here, `k = 1` indicates that the checking is performed up to depth 1. Initially, ESBMC is checking the base case to ensure no bugs are present. +4. **Symex and Slicing**: Symbolic execution (Symex) is completed, and slicing removes irrelevant parts of the program to reduce complexity. For the base case, this reduced the assignments significantly. +5. **Verification Conditions (VCCs) Generation**: ESBMC generates VCCs, which are conditions that need to be satisfied to prove the correctness of the program. In the base case, only 1 VCC remains after simplification. +- **Warnings**: Several warnings are issued about the use of `printf` functions with non-literal format strings. This potentially insecure usage suggests replacing direct format specifiers with explicit string arguments to enhance security. +These messages relate how ESBMC handles loops. +6. **Solving**: ESBMC uses the Boolector solver to solve the generated conditions. The decision procedure's runtime and overall BMC program time are noted. +7. **Success in the Base Case**: ESBMC finds no bugs in the base case and proceeds to check the forward condition to ensure all states (up to `k = 1`) are reachable. +2. **Parsing**: ESBMC starts by parsing the provided C file (the file path indicates the file in question), preparing it for the model checking process. +- **"No bug has been found in the base case"**: Initially, ESBMC didn't find any errors under the base case scenario. This means that, at the starting point, without progressing further (`k = 1`), the program doesn't violate any of its assertions or fail to meet its specified conditions. +The BMC process starts again, and ESBMC reports that it was able to symbolically execute the program in 0.027 seconds.''' + + input4 = '''8. **Verification Successful**: The verification is successful, with the forward condition indicating that all states are reachable within the given bounds. +In this case, it initially starts with a simple base case, where k=1. +ESBMC displays various aspects for the code, such as the decision procedure used (`ESBMC-boolector`) and the number of switch points encountered during the analysis. +These details are not directly related to understanding the vulnerabilities in the code. +exit condition (`break`, `return`, or something else) +ESBMC starts with a base case for the program, revealing the tool's version for this context +The output from ESBMC (the Efficient SMT-based Context-Bounded Model Checker) pertaining to this code indicates a verification process: +- **"ESBMC version 7.5.0"**: The version of ESBMC used for verification is 7.5.0, denoting the tool's specific features and capabilities inherent to this version. +### Conclusion +- **"Solution found by the forward condition; all states are reachable (k = 1)"**: ESBMC concluded that, under the given conditions, every state it could consider within one step is reachable without uncovering any logical inconsistencies or errors. This suggests that the system modeled by the program, under the constraints and assertions defined, behaves as expected without leading to any undesired or erroneous states within the bounded context.''' + + expected_output2='''the output of ESBMC goes through some verifications and iterates 5 times to find a bug. +Multiple threads perform operations on a shared variable `data` +The ESBMC will go thorugh some initial verifications +2. **GOTO Program Creation**: The tool generates a GOTO program. GOTO programs abstract away complex control flow to simplify the analysis. This step took approximately 0.343 seconds, with post-processing taking an additional 0.028 seconds. +3. **Bounded Model Checking (BMC)**: BMC involves checking the truth of properties up to a certain bound. Here, `k = 1` indicates that the checking is performed up to depth 1. Initially, ESBMC is checking the base case to ensure no bugs are present. +''' + expected_output3 ='''3. **Bounded Model Checking (BMC)**: BMC involves checking the truth of properties up to a certain bound. Here, `k = 1` indicates that the checking is performed up to depth 1. Initially, ESBMC is checking the base case to ensure no bugs are present. +5. **Verification Conditions (VCCs) Generation**: ESBMC generates VCCs, which are conditions that need to be satisfied to prove the correctness of the program. In the base case, only 1 VCC remains after simplification. +6. **Solving**: ESBMC uses the Boolector solver to solve the generated conditions. The decision procedure's runtime and overall BMC program time are noted. +7. **Success in the Base Case**: ESBMC finds no bugs in the base case and proceeds to check the forward condition to ensure all states (up to `k = 1`) are reachable. +- **"No bug has been found in the base case"**''' + + expected_output4 ='''8. **Verification Successful**: The verification is successful, with the forward condition indicating that all states are reachable within the given bounds. +It starts with a simple base case, where k=1. +exit condition (like `break` or `return`) +ESBMC starts with a base case for the program. +The output from ESBMC pertaining to this code indicates a verification process: +- **"ESBMC version 7.5.0"** +- **"Solution found by the forward condition; all states are reachable (k = 1)"**: ESBMC concluded that, under the given conditions, every state it could consider within one step is reachable without uncovering any logical inconsistencies or errors.''' + self.maxDiff = None + output_str2 = esbmc_util.reduce_output2(input2) + + #print('Filtered output2:') + #print(output_str2) + #print('Expected output2: ') + self.assertEqual(output_str2, expected_output2) + output_str3 = esbmc_util.reduce_output2(input3) + self.assertEqual(output_str3, expected_output3) + #print('Filtered output 3') + #print(output_str3) + #print('Expected output3:') + #print(expected_output3) + output_str4 = esbmc_util.reduce_output2(input4) + self.assertEqual(output_str4, expected_output4) + #print('Filtered output 4') + #print(output_str4) + # print('Expected output4:') + # print(expected_output4) + + def test_remove_patterns_nltk(self): + + input5 = '''Runtime decision procedure:1.223s This part should remain. +BMC program time:2.332s +Multiple threads (`thread1`, `thread2`, `thread3`) perform operations on a shared variable `data` +- **Warnings**: Several warnings are issued about the use of `printf` functions with non-literal format strings. +A series of wanings are issued +### In Conclusion ### +The ESBMC output is shrinked.''' + expected_output =''' This part should remain. +Multiple threads perform operations on a shared variable `data` +The ESBMC output is shrinked.''' + + output = esbmc_util.remove_patterns_nltk(input5) + self.assertEqual(output, expected_output) + # print('\n'+output) + # print(expected_output) + +if __name__ == '__main__': + unittest.main() +print(sys.path) From c7ded6feb75d3eab6b1c069cb5e929d5ba904fae Mon Sep 17 00:00:00 2001 From: "mihai.state" Date: Tue, 26 Mar 2024 12:07:03 +0200 Subject: [PATCH 5/6] Added test for the GPT-4-turbo-preview AI model --- DOCUMENTATION.md | 5 ----- tests/test_ai_models.py | 2 ++ 2 files changed, 2 insertions(+), 5 deletions(-) delete mode 100644 DOCUMENTATION.md diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md deleted file mode 100644 index 5e3908c..0000000 --- a/DOCUMENTATION.md +++ /dev/null @@ -1,5 +0,0 @@ -# Contribution - -I created three functions for reducing the output of ESBMC-AI by using regex patterns and the nltk library. Thes functions are created in the esbmc_ai/esbmc_util.py file, with the names esbmc_ouput_optimisation(), reduce_output2() and remove_patterns_nltk() and called in the esbmc() function. They work successfully on reducing the ESBMC-AI output with the line patterns that are created inside them. All of the patterns have been tested on varios ESBMC-Ai outputs and they work well. Besides that, I created more output scenarious in the config.json file: null pointer, arthmetic overflow, buffer overflow, struct-fileds check, NaN check, array out-of-bounds, division-by-zero - -To run the test file that I created, go to the esbmc_ai_fork/esbmc-ai/ folder and run this command python -m tests.test_output_reducer \ No newline at end of file diff --git a/tests/test_ai_models.py b/tests/test_ai_models.py index e9bdba6..c914231 100644 --- a/tests/test_ai_models.py +++ b/tests/test_ai_models.py @@ -22,8 +22,10 @@ def test_is_valid_ai_model() -> None: assert is_valid_ai_model(AIModels.FALCON_7B.value) assert is_valid_ai_model(AIModels.GPT_3_16K.value) + assert is_valid_ai_model(AIModels.GPT_4_TURBO_PREVIEW.value) assert is_valid_ai_model("gpt-3.5-turbo") assert is_valid_ai_model("falcon-7b") + assert is_valid_ai_model("gpt-4-turbo-preview") def test_is_not_valid_ai_model() -> None: From 36798da8b5a00246f8507a732e44234b0ad34ca4 Mon Sep 17 00:00:00 2001 From: "mihai.state" Date: Thu, 27 Jun 2024 19:48:33 +0100 Subject: [PATCH 6/6] Optimized the regex pattens to be more time efficient Removed the 'flags=re.MULTILINE' argument for the regex patterns that work properly without it Used the re.compile method for some patterns to reduce time complexity Signed-off-by: mihai.state --- config.json | 6 +- esbmc_ai/esbmc_util.py | 182 +++++++++------- path/to/your/virtualenv/bin/Activate.ps1 | 247 ---------------------- path/to/your/virtualenv/bin/activate | 69 ------ path/to/your/virtualenv/bin/activate.csh | 26 --- path/to/your/virtualenv/bin/activate.fish | 66 ------ path/to/your/virtualenv/bin/pip | 8 - path/to/your/virtualenv/bin/pip3 | 8 - path/to/your/virtualenv/bin/pip3.10 | 8 - path/to/your/virtualenv/bin/pip3.11 | 8 - path/to/your/virtualenv/bin/python | 1 - path/to/your/virtualenv/bin/python3 | 1 - path/to/your/virtualenv/bin/python3.11 | 1 - path/to/your/virtualenv/lib64 | 1 - path/to/your/virtualenv/pyvenv.cfg | 5 - samples/branches.c | 8 + tests/test_output_reducer.py | 21 +- 17 files changed, 118 insertions(+), 548 deletions(-) delete mode 100644 path/to/your/virtualenv/bin/Activate.ps1 delete mode 100644 path/to/your/virtualenv/bin/activate delete mode 100644 path/to/your/virtualenv/bin/activate.csh delete mode 100644 path/to/your/virtualenv/bin/activate.fish delete mode 100755 path/to/your/virtualenv/bin/pip delete mode 100755 path/to/your/virtualenv/bin/pip3 delete mode 100755 path/to/your/virtualenv/bin/pip3.10 delete mode 100755 path/to/your/virtualenv/bin/pip3.11 delete mode 120000 path/to/your/virtualenv/bin/python delete mode 120000 path/to/your/virtualenv/bin/python3 delete mode 120000 path/to/your/virtualenv/bin/python3.11 delete mode 120000 path/to/your/virtualenv/lib64 delete mode 100644 path/to/your/virtualenv/pyvenv.cfg create mode 100644 samples/branches.c diff --git a/config.json b/config.json index a29176c..e684c02 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "ai_model": "gpt-4-turbo-preview", + "ai_model": "gpt-3.5-turbo-16k", "ai_custom": {}, "esbmc_path": "~/ESBMC/bin/esbmc", "allow_successful": true, @@ -71,7 +71,7 @@ "system":[ { "role": "System", - "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a null pointer issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a null pointer might occur. The solution should prevent unexpected behaviour, crashes or vulnerabilites due to the null pointer. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles mull pointers effectively.\nGuidelines: Always initialize pointers: Uninitialized pointers may cause a segmentation fault or can point to some arbitrary memory location. Use null pointers to represent 'no object' or 'unknown'. Free memory properly: Once the memory pointed by a pointer is freed, set the pointer to null. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the null pointer vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a null pointer issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a null pointer might occur. The solution should prevent unexpected behaviour, crashes or vulnerabilites due to the null pointer. \nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids undefined behaviour and handles null pointers effectively.\nGuidelines: Always initialize pointers: Uninitialized pointers may cause a segmentation fault or can point to some arbitrary memory location. Use null pointers to represent 'no object' or 'unknown'. Free memory properly: Once the memory pointed by a pointer is freed, set the pointer to null. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the null pointer vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." }, { "role": "AI", @@ -171,7 +171,7 @@ "system":[ { "role": "System", - "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a memory leak issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a deadlock might occur. The solution should prevent unexpected behaviour, chrases, security vulnerabilities, performance issues and maintainability issues.\nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids data races and can handle them effectively in the same time.\nGuidelines: Implement methods that do proper resource allocation to prevent circular wait conditions, create a deadlock detection mechanism, so the system can take steps to recover from it, or, alternatively, use the Ostrich Algorithm, which ignores the problem on the assumption that it occurs rarely, and, if it happens, the system is rebooted. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the deadlock vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." + "content": "Here's a C program with a vulnerability:\n```c\n{source_code}\n```\nA Formal Verification tool identified a memory leak issue:\n{esbmc_output}\nTask: Modify the C code to safely handle scenarios where a deadlock might occur. The solution should prevent unexpected behaviour, crashes, security vulnerabilities, performance issues and maintainability issues.\nGuidelines: Focus on making essential changes only. Avoid adding or modifying comments, and ensure the changes are precise and minimal.\nGuidelines: Ensure the revised code avoids data races and can handle them effectively in the same time.\nGuidelines: Implement methods that do proper resource allocation to prevent circular wait conditions, create a deadlock detection mechanism, so the system can take steps to recover from it, or, alternatively, use the Ostrich Algorithm, which ignores the problem on the assumption that it occurs rarely, and, if it happens, the system is rebooted. Output: Provide the corrected, verified C code. The solution should compile and run error-free, addressing the deadlock vulnerability.\nStart the code snippet with```c and end with ```. Reply OK if you understand." }, { "role": "AI", diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index ef90a07..aae4269 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -35,69 +35,73 @@ def esbmc(path: str, esbmc_params: list): def esbmc_output_optimisation(esbmc_output:str) -> str: - esbmc_output =re.sub(r'(^\d+\.\s*)?The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'(^\d+\.\s*)?This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'(^\d+\.\s*)?The output mentions that no solver was specified, so ESBMC defaults to using the Boolector solver\.', '', esbmc_output) + esbmc_output = re.sub(r'(^\d+\.\s*)?This output states that the solving process with the Boolector solver took a certain amount of time\.','', esbmc_output) esbmc_output = re.sub(r'[-]+', '', esbmc_output) # Remove lines of dashes esbmc_output = re.sub(r'\(*\b[0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8} [0-9a-fA-F]{8}\b\)*', '', esbmc_output) # Remove hex patterns - esbmc_output = re.sub(r'^Line \d+: ESBMC is using the Boolector solver \d+\.\d+\.\d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) # Remove Boolector lines - esbmc_output = re.sub(r'\d+. ESBMC is using the Boolector solver \d+\. \d+\. \d+ to solve the encoding\.$', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^Line \d+: ESBMC is using the Boolector solver \d+\.\d+\.\d+ to solve the encoding\.$', '', esbmc_output) + # Remove Boolector lines + esbmc_output = re.sub(r'\d+. ESBMC is using the Boolector solver \d+\. \d+\. \d+ to solve the encoding\.$', '', esbmc_output) - esbmc_output = re.sub(r'^Line \d+: The solver takes \d+\.\d+s to determine the runtime decision procedure\.$', '', esbmc_output, flags=re.MULTILINE) # Remove solver time lines - esbmc_output = re.sub(r'.*Boolector.*\n\n', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'^Line \d+: The solver takes \d+\.\d+s to determine the runtime decision procedure\.$', '', esbmc_output) + # Remove solver time lines + esbmc_output = re.sub(r'.*Boolector.*\n\n', '', esbmc_output) pattern = r"- `.*Slicing time: \d+\.\d+s \(removed \d+ assignments\)`.*: ESBMC has performed slicing, a technique that removes irrelevant assignments from the program and reduces the complexity of analysis." - esbmc_output = re.sub(pattern, '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(pattern, '', esbmc_output) esbmc_output = re.sub(r'\n\s*\n', '\n', esbmc_output) esbmc_output = esbmc_output.replace("output from ESBMC", "ESBMC output") - esbmc_output = re.sub(r'The time.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'The time.*', '', esbmc_output) - esbmc_output = re.sub(r'[^.]VCCs represent.*|[^.]These VCCs represent.*', '', esbmc_output, flags=re.MULTILINE) #These types of lines represent some sequences of VCCs that can easily be ommitted for development purposes - esbmc_output = re.sub(r'.*bitvector.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'[^.]VCCs represent.*|[^.]These VCCs represent.*', '', esbmc_output) + #These types of lines represent some sequences of VCCs that can easily be ommitted for development purposes + esbmc_output = re.sub(r'.*bitvector.*', '', esbmc_output) esbmc_output = re.sub(r'\s*\(\s*`thread\d+`(?:,\s*`thread\d+`)*\s*\)', '', esbmc_output) - esbmc_output = re.sub(r'.*floating-point arithmetic.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*encoding.*', '', esbmc_output, flags = re.MULTILINE) - esbmc_output = re.sub(r'.*encoding time.*', '', esbmc_output, flags= re.MULTILINE) + esbmc_output = re.sub(r'.*floating-point arithmetic.*', '', esbmc_output) + esbmc_output = re.sub(r'.*encoding.*', '', esbmc_output) + esbmc_output = re.sub(r'.*encoding time.*', '', esbmc_output) - esbmc_output = re.sub(r', which takes \d+\.\d+s\.*', '.', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'The output indicates that ESBMC generates a verification condition \(VCC\) for each possible path and encodes them into a solver\.', '', esbmc_output, flags= re.MULTILINE) #This information can be disregarded for development purposes - esbmc_output = re.sub(r'Measures the [^.]+?encoding time [^.]+?\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r', which takes \d+\.\d+s\.*', '.', esbmc_output) + esbmc_output = re.sub(r'The output indicates that ESBMC generates a verification condition \(VCC\) for each possible path and encodes them into a solver\.', '', esbmc_output) #This information can be disregarded for development purposes + esbmc_output = re.sub(r'Measures the [^.]+?encoding time [^.]+?\.', '', esbmc_output) - esbmc_output = re.sub(r'In this case, the procedure took [0-9.]+ seconds\.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\s*It measures[^.]+? encoding time [^.]+?\.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'In this case, the procedure took [0-9.]+ seconds\.', '', esbmc_output) + esbmc_output = re.sub(r'\s*It measures[^.]+? encoding time [^.]+?\.', '', esbmc_output) esbmc_output = re.sub(r'[^.]*GOTO program[^.]*\.', '', esbmc_output) - esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\.\s*.*time.*\n\n|\.\s*[^.]*time[^.]*\.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*program time: \d+\.\d+s \.', '',esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\d+\.\d+s \.', '', esbmc_output) + esbmc_output = re.sub(r'\.\s*.*time.*\n\n|\.\s*[^.]*time[^.]*\.', '', esbmc_output) + esbmc_output = re.sub(r'.*program time: \d+\.\d+s \.', '',esbmc_output) - esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time:\d+\.\d+s', '', esbmc_output, flags= re.MULTILINE) - esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s\"*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*Symex.*|.*symex.*|.*symbolic execution.*\n\n', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\.\s*[^.]*?BMC program time:\d+\.\d+s', '', esbmc_output) + esbmc_output =re.sub(r'\.\s*[^.]*?Runtime decision procedure: \d+.\d+s\"*', '', esbmc_output) + esbmc_output = re.sub(r'.*Symex.*|.*symex.*|.*symbolic execution.*\n\n', '', esbmc_output) - esbmc_output =re.sub(r'After[^.]*?, the code', 'The program then', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*assignments.*|.*assignment.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*iteratively.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output =re.sub(r'After[^.]*?, the code', 'The program then', esbmc_output) + esbmc_output = re.sub(r'.*assignments.*|.*assignment.*', '', esbmc_output) + esbmc_output = re.sub(r'.*This is done iteratively for all the assigned variables.', '', esbmc_output) - esbmc_output = re.sub(r'.*branches.*', '', esbmc_output, flags=re.MULTILINE) #The informations about branches are not necessary relevant to the understanding of bugs - esbmc_output = re.sub(r'.*analysis process.*\s','', esbmc_output, flags=re.MULTILINE ) + esbmc_output = re.sub(r'The decision procedure involves executing various branches of the program symbolically and making decisions based on constraints and conditions.', '', esbmc_output) + #The informations about branches are not necessary relevant to the understanding of bugs + esbmc_output = re.sub(r'.*analysis process.*\s','', esbmc_output) esbmc_output = re.sub(r'.*remaining VCC.*\s|.*remaining VCCs.*\s','', esbmc_output) - esbmc_output = re.sub(r'.*0 VCCs.*', '', esbmc_output, flags= re.MULTILINE) # If there are no more VCCs left, the user is not going to take action in this regard. - esbmc_output = re.sub(r'Finally, ESBMC confirms that the verfication.*"VERIFICATION SUCCESSFUL."', 'Finally, ESBMC indicates a successful verification.', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*0 VCCs.*', '', esbmc_output) # If there are no more VCCs left, the user is not going to take action in this regard. + esbmc_output = re.sub(r'Finally, ESBMC confirms that the verfication.*"VERIFICATION SUCCESSFUL."', 'Finally, ESBMC indicates a successful verification.', esbmc_output) + esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output) - esbmc_output = re.sub(r'\.\s*.*"Symex completed in:.*".*\s', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*These warnings are about.*', '', esbmc_output, flags=re.MULTILINE) #Warning information are not useful for debugging purposes + esbmc_output = re.sub(r'\.\s*.*"Symex completed in:.*".*\s', '', esbmc_output) + esbmc_output = re.sub(r'.*These warnings are about.*', '', esbmc_output) #Warning information are not useful for debugging purposes - esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'Let\'s[^.]the[^.]code and explain the relevant parts of the output from ESBMC.', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'virtual_table::std::.*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'Let\'s go through the source code and the output of ESBMC to understand what is happening.|Let\'s go through the code and explain the relevant parts along with the output from ESBMC.', '', esbmc_output) + esbmc_output = re.sub(r'Let\'s[^.]the[^.]code and explain the relevant parts of the output from ESBMC.', '', esbmc_output) + esbmc_output = re.sub(r'virtual_table::std::.*', '', esbmc_output) - esbmc_output = re.sub(r'foo =.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'the .*Efficient SMT-Based Context-Bounded Model Checker \(ESBMC\)', 'ESBMC', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'(Efficient SMT-Based Bounded Model Checker)', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'foo =.*', '', esbmc_output) + esbmc_output = re.sub(r'the .*Efficient SMT-Based Context-Bounded Model Checker \(ESBMC\)', 'ESBMC', esbmc_output) + esbmc_output = re.sub(r'(Efficient SMT-Based Bounded Model Checker)', '', esbmc_output) return esbmc_output @@ -105,57 +109,80 @@ def esbmc_output_optimisation(esbmc_output:str) -> str: def reduce_output2(esbmc_output: str) -> str: - esbmc_output = re.sub(r'\.\s*[^.]*symbolically execute the program[^.]*\.', '', esbmc_output, flags=re.MULTILINE) #Reffers to a time measurement - esbmc_output = re.sub('In this case, it initially starts with', 'It starts with', esbmc_output, flags= re.MULTILINE) - esbmc_output = re.sub(r'(.*)\s*before moving forward', r'\1', esbmc_output, flags=re.MULTILINE) #removes the specified sequence, as it is trivial + pattern1 = re.compile(r'\.\s*[^.]*symbolically execute the program[^.]*\.') + esbmc_output = pattern1.sub('', esbmc_output) #Reffers to a time measurement + + esbmc_output = esbmc_output.replace('In this case, it initially starts with', 'It starts with') + esbmc_output = re.sub(r'(.*)\s*before moving forward', r'\1', esbmc_output) #removes the specified sequence, as it is trivial - esbmc_output = re.sub(r'.*k=0.*', '', esbmc_output, flags=re.MULTILINE) # for k=0, which is the base case, there is no need for an explanation for development purposes, the user would not find this information helpful - esbmc_output = re.sub(r'the Efficient SMT-Based Context-Bounded Model Checker \(ESBMC\)', 'ESBMC', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r', which stands for Efficient SMT-Based Countext-Bounded Model Checker', '.', esbmc_output, flags=re.MULTILINE ) + pattern3 = re.compile('.*k=0.*') # for k=0, which is the base case, there is no need for an explanation for development purposes, the user would not find this information helpful + esbmc_output = pattern3.sub('', esbmc_output) + esbmc_output = re.sub('the Efficient SMT-Based Context-Bounded Model Checker \(ESBMC\)', 'ESBMC', esbmc_output) + + esbmc_output = esbmc_output.replace(', which stands for Efficient SMT-Based Context-Bounded Model Checker', '.') + pattern4 = re.compile('.*Warnings.*|.*warnings.*') + esbmc_output = pattern4.sub('', esbmc_output) - esbmc_output = re.sub(r'.*Warnings.*|.*warnings.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*\*\*Warnings\*\*.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'.*warning.*', '', esbmc_output, flags=re.MULTILINE) + pattern5 = re.compile('.*\*Warnings\*\*.*') + esbmc_output = pattern5.sub('', esbmc_output) + + pattern6 = re.compile('.*warning.*') + esbmc_output = pattern6.sub('', esbmc_output) - esbmc_output = re.sub('.*A series of warnings.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'ESBMC \(the Efficient SMT-based Context-Bounded Model Checker\)', 'ESBMC', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'These messages relate how ESBMC handles loops.*\.|These messages relate.*\.', '', esbmc_output, flags=re.MULTILINE) #These type of lines can be ommitted, if there is anything relevant about loops, ESBMC-AI can display other more precise explanations + pattern7 = re.compile('.*A series of warnings.*') + esbmc_output = pattern7.sub('', esbmc_output) + + esbmc_output = re.sub('ESBMC \(the Efficient SMT-based Context-Bounded Model Checker\)', 'ESBMC', esbmc_output) + pattern8 = re.compile('These messages relate how ESBMC handles loops.*\.|These messages relate.*\.') #These type of lines can be ommitted, if there is anything relevant about loops, ESBMC-AI can display other more precise explanations + esbmc_output = pattern8.sub('', esbmc_output) - esbmc_output = re.sub(r'which, in simple terms,', 'which', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'After[^.]*iterations[^.]*\.', '', esbmc_output, flags=re.MULTILINE) #These types of line can be removed to make the output easier to read, without ommitting any important information - esbmc_output = re.sub(r'[^.]*iterations[^.]*\.', '.', esbmc_output, flags=re.MULTILINE) + esbmc_output= esbmc_output.replace('which, in simple terms', 'which') + esbmc_output = re.sub(r'After[^.]*iterations[^.]*\.', '', esbmc_output) #These types of line can be removed to make the output easier to read, without ommitting any important information - esbmc_output = re.sub(r'Initially, ESBMC parses the given source file. It identifies', 'ESBMC identifies', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\.\s*.*parsing the source file.*|\.\s*.parses the source file.*|\s*.*parsing the source file.*|\s*.*starts by parsing the program.*|\s*.*starts by parsing the.*', '', esbmc_output, flags=re.MULTILINE) # this is an additional information that ESBMC-AI provides, it is not useful for understanding the behaviour of the tested code - esbmc_output = re.sub(r'\*\*Parsing and Compilation Warnings:\*\*', '', esbmc_output, flags=re.MULTILINE) #This represents a type of header in the output, which is followed by an explanatory paragraph, its removal doesn't affect the understandability of the parsing and warning explanations + + esbmc_output = esbmc_output.replace('Initially, ESBMC parses the given source file. It identifies', 'ESBMC identifies') + pattern11 = re.compile('\.\s*.parsing the source file.*|\.\s*.parses the source file.*|\s*.*parsing the source file.*|\s*.*starts by parsing the program.*|\s*.*starts by parsing the.*') # this is an additional information that ESBMC-AI provides, it is not useful for understanding the behaviour of the tested code + + esbmc_output = pattern11.sub('', esbmc_output) + pattern12 = re.compile('\*\*Parsing and Compilation Warnings:\*\*') #This represents a type of header in the output, which is followed by an explanatory paragraph, its removal doesn't affect the understandability of the parsing and warning explanations + esbmc_output = pattern12.sub('', esbmc_output) esbmc_output = re.sub(r'### Conclusion.*(?:\n{2,}|$)|### In Conclusion.*(?:\n{2,}|$)', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'\*\*Conclusion:\*\*.*(?:\n{2,}|$)', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\*\*Conclusion:\*\*.*(?:\n{2,}|$)', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(", revealing the tool's version for this context", '.', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'exit condition \(`break`, `return`,.*?\)', 'exit condition (like `break` or `return`)', esbmc_output, flags=re.MULTILINE) #keeps the message concise, displays only the relevant information + esbmc_output = esbmc_output.replace(", revealing the tool's version for this context", '.') + esbmc_output = re.sub(r'exit condition \(`break`, `return`,.*?\)', 'exit condition (like `break` or `return`)', esbmc_output) #keeps the message concise, displays only the relevant information + - esbmc_output = re.sub(r'.*Slicing.*\.|.*slicing.*\.', '', esbmc_output, flags=re.MULTILINE) #Slicing information doesn't help for understanding bugs or approach them differently - esbmc_output = re.sub(r'The output contains.*are not relevant.*\n', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'such as the decision procedure used (`ESBMC-boolector`) and', 'such as', esbmc_output, flags=re.MULTILINE) + esbmc_output = re.sub(r'.*Slicing.*\.|.*slicing.*\.', '', esbmc_output) #Slicing information doesn't help for understanding bugs or approach them differently + pattern16 = re.compile('The output contains.*are not relevant.*\n') + + esbmc_output = pattern16.sub('', esbmc_output) + esbmc_output = re.sub(r'.*(`ESBMC-boolector`).*', '', esbmc_output) - esbmc_output = re.sub(r'These details are not directly related to understanding the vulnerabilities in the code.*|\.\s*.*are not direclty related to understanding the vulnerabilities.*|\.\s*.*are not relevant to understanding the vulnerabilities.*', '', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r'\s*\(\s*`thread\d+`(?:,\s*`thread\d+`)*\s*\)', '', esbmc_output) #It is not neccesary to mention threads in parantheses, as they are all explained afterwards - esbmc_output = re.sub(r'.*(`ESBMC-boolector`).*', '', esbmc_output, flags=re.MULTILINE) + esbmc_output = esbmc_output.replace('such as the decision procedure used (`ESBMC-boolector`) and', 'such as') + pattern16 = re.compile('These details are not directly related to understanding the vulnerabilities in the code.*|\.\s*.*are not direclty related to understanding the vulnerabilities.*|\.\s*.*are not relevant to understanding the vulnerabilities.*') + + esbmc_output = pattern16.sub('', esbmc_output) + pattern17 = re.compile('\s*\(\s*`thread\d+`(?:,\s*`thread\d+`)*\s*\)') #It is not neccesary to mention threads in parantheses, as they are all explained afterwards + esbmc_output = pattern17.sub('', esbmc_output) esbmc_output = re.sub(r'^\s*\n', '', esbmc_output) + pattern19 = re.compile('^\s*\n') + esbmc_output = pattern19.sub('', esbmc_output) + esbmc_output = re.sub(r'\n{2,}', '\n', esbmc_output) - esbmc_output = re.sub(r': The version of ESBMC.*', '', esbmc_output, flags=re.MULTILINE) + pattern20 = re.compile(': The version of ESBMC.*') + esbmc_output = pattern20.sub('', esbmc_output) - esbmc_output = re.sub(r'\*\*"No bug has been found in the base case"\*\*:.*', '**"No bug has been found in the base case"**', esbmc_output, flags=re.MULTILINE) - esbmc_output = re.sub(r' This suggests that the system modeled by the program, under the constraints and assertions defined, behaves as expected without leading to any undesired or erroneous states within the bounded context.', '', esbmc_output, flags=re.MULTILINE) #The output is more concise without this sentence, and all relevant aspects are still provided. + esbmc_output = re.sub(r'\*\*"No bug has been found in the base case"\*\*:.*', '**"No bug has been found in the base case"**', esbmc_output) + esbmc_output = esbmc_output.replace(r' This suggests that the system modeled by the program, under the constraints and assertions defined, behaves as expected without leading to any undesired or erroneous states within the bounded context.', '') #The output is more concise without this sentence, and all relevant aspects are still provided. return esbmc_output def remove_patterns_nltk(text): # Tokenize the text into sentences sentences = text.split('\n') - # Define the pattern to be removed pattern = "Runtime decision procedure:" pattern2 = "BMC program time:" @@ -165,40 +192,35 @@ def remove_patterns_nltk(text): conclusion_pattern = re.compile(r'### In Conclusion ###.*', re.IGNORECASE) bmc_time = re.compile(r'BMC program time:\d+\.\d+s') runtime_pattern =re.compile(r'Runtime decision procedure:\d+\.\d+s') - + boolector_solver = re.compile(r'.*Boolector solver.*') # Iterate over each sentence for i in range(len(sentences)): # Tokenize the sentence into words words = word_tokenize(sentences[i]) - # If the pattern is in the sentence, remove it if pattern in words: # Find the index of the pattern index = words.index(pattern) - # Remove the pattern and the next two words (the time and 's') if len(words) > index + 2 and words[index + 2] == 's': del words[index:index+3] - # Join the words back into a sentence sentences[i] = ' '.join(words) if pattern2 in words: index = words.index(pattern2) - if len(words) > index+2 and words[index+2] == 's': del words[index:index+3] sentences[i] = ' '.join(words) - # Remove thread patterns cleaned_sentences = [thread_pattern.sub('', sentence) for sentence in sentences] cleaned_sentences = [warning_pattern.sub('', sentence) for sentence in cleaned_sentences] cleaned_sentences = [warning_pattern2.sub('', sentence) for sentence in cleaned_sentences] cleaned_sentences = [conclusion_pattern.sub('', sentence) for sentence in cleaned_sentences] cleaned_sentences = [bmc_time.sub('', sentence) for sentence in cleaned_sentences] - cleaned_sentences = [runtime_pattern.sub('', sentence) for sentence in cleaned_sentences] - + cleaned_sentences = [runtime_pattern.sub('', sentence) for sentence in cleaned_sentences] + cleaned_sentences = [boolector_solver.sub('', sentence) for sentence in cleaned_sentences] cleaned_sentences = [line for line in cleaned_sentences if line.strip() !=''] - + # Join the sentences back into a text modified_text = '\n'.join(cleaned_sentences) diff --git a/path/to/your/virtualenv/bin/Activate.ps1 b/path/to/your/virtualenv/bin/Activate.ps1 deleted file mode 100644 index b49d77b..0000000 --- a/path/to/your/virtualenv/bin/Activate.ps1 +++ /dev/null @@ -1,247 +0,0 @@ -<# -.Synopsis -Activate a Python virtual environment for the current PowerShell session. - -.Description -Pushes the python executable for a virtual environment to the front of the -$Env:PATH environment variable and sets the prompt to signify that you are -in a Python virtual environment. Makes use of the command line switches as -well as the `pyvenv.cfg` file values present in the virtual environment. - -.Parameter VenvDir -Path to the directory that contains the virtual environment to activate. The -default value for this is the parent of the directory that the Activate.ps1 -script is located within. - -.Parameter Prompt -The prompt prefix to display when this virtual environment is activated. By -default, this prompt is the name of the virtual environment folder (VenvDir) -surrounded by parentheses and followed by a single space (ie. '(.venv) '). - -.Example -Activate.ps1 -Activates the Python virtual environment that contains the Activate.ps1 script. - -.Example -Activate.ps1 -Verbose -Activates the Python virtual environment that contains the Activate.ps1 script, -and shows extra information about the activation as it executes. - -.Example -Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv -Activates the Python virtual environment located in the specified location. - -.Example -Activate.ps1 -Prompt "MyPython" -Activates the Python virtual environment that contains the Activate.ps1 script, -and prefixes the current prompt with the specified string (surrounded in -parentheses) while the virtual environment is active. - -.Notes -On Windows, it may be required to enable this Activate.ps1 script by setting the -execution policy for the user. You can do this by issuing the following PowerShell -command: - -PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser - -For more information on Execution Policies: -https://go.microsoft.com/fwlink/?LinkID=135170 - -#> -Param( - [Parameter(Mandatory = $false)] - [String] - $VenvDir, - [Parameter(Mandatory = $false)] - [String] - $Prompt -) - -<# Function declarations --------------------------------------------------- #> - -<# -.Synopsis -Remove all shell session elements added by the Activate script, including the -addition of the virtual environment's Python executable from the beginning of -the PATH variable. - -.Parameter NonDestructive -If present, do not remove this function from the global namespace for the -session. - -#> -function global:deactivate ([switch]$NonDestructive) { - # Revert to original values - - # The prior prompt: - if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) { - Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt - Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT - } - - # The prior PYTHONHOME: - if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) { - Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME - Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME - } - - # The prior PATH: - if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) { - Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH - Remove-Item -Path Env:_OLD_VIRTUAL_PATH - } - - # Just remove the VIRTUAL_ENV altogether: - if (Test-Path -Path Env:VIRTUAL_ENV) { - Remove-Item -Path env:VIRTUAL_ENV - } - - # Just remove VIRTUAL_ENV_PROMPT altogether. - if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) { - Remove-Item -Path env:VIRTUAL_ENV_PROMPT - } - - # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether: - if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) { - Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force - } - - # Leave deactivate function in the global namespace if requested: - if (-not $NonDestructive) { - Remove-Item -Path function:deactivate - } -} - -<# -.Description -Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the -given folder, and returns them in a map. - -For each line in the pyvenv.cfg file, if that line can be parsed into exactly -two strings separated by `=` (with any amount of whitespace surrounding the =) -then it is considered a `key = value` line. The left hand string is the key, -the right hand is the value. - -If the value starts with a `'` or a `"` then the first and last character is -stripped from the value before being captured. - -.Parameter ConfigDir -Path to the directory that contains the `pyvenv.cfg` file. -#> -function Get-PyVenvConfig( - [String] - $ConfigDir -) { - Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg" - - # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue). - $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue - - # An empty map will be returned if no config file is found. - $pyvenvConfig = @{ } - - if ($pyvenvConfigPath) { - - Write-Verbose "File exists, parse `key = value` lines" - $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath - - $pyvenvConfigContent | ForEach-Object { - $keyval = $PSItem -split "\s*=\s*", 2 - if ($keyval[0] -and $keyval[1]) { - $val = $keyval[1] - - # Remove extraneous quotations around a string value. - if ("'""".Contains($val.Substring(0, 1))) { - $val = $val.Substring(1, $val.Length - 2) - } - - $pyvenvConfig[$keyval[0]] = $val - Write-Verbose "Adding Key: '$($keyval[0])'='$val'" - } - } - } - return $pyvenvConfig -} - - -<# Begin Activate script --------------------------------------------------- #> - -# Determine the containing directory of this script -$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition -$VenvExecDir = Get-Item -Path $VenvExecPath - -Write-Verbose "Activation script is located in path: '$VenvExecPath'" -Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)" -Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)" - -# Set values required in priority: CmdLine, ConfigFile, Default -# First, get the location of the virtual environment, it might not be -# VenvExecDir if specified on the command line. -if ($VenvDir) { - Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values" -} -else { - Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir." - $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/") - Write-Verbose "VenvDir=$VenvDir" -} - -# Next, read the `pyvenv.cfg` file to determine any required value such -# as `prompt`. -$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir - -# Next, set the prompt from the command line, or the config file, or -# just use the name of the virtual environment folder. -if ($Prompt) { - Write-Verbose "Prompt specified as argument, using '$Prompt'" -} -else { - Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value" - if ($pyvenvCfg -and $pyvenvCfg['prompt']) { - Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'" - $Prompt = $pyvenvCfg['prompt']; - } - else { - Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)" - Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'" - $Prompt = Split-Path -Path $venvDir -Leaf - } -} - -Write-Verbose "Prompt = '$Prompt'" -Write-Verbose "VenvDir='$VenvDir'" - -# Deactivate any currently active virtual environment, but leave the -# deactivate function in place. -deactivate -nondestructive - -# Now set the environment variable VIRTUAL_ENV, used by many tools to determine -# that there is an activated venv. -$env:VIRTUAL_ENV = $VenvDir - -if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) { - - Write-Verbose "Setting prompt to '$Prompt'" - - # Set the prompt to include the env name - # Make sure _OLD_VIRTUAL_PROMPT is global - function global:_OLD_VIRTUAL_PROMPT { "" } - Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT - New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt - - function global:prompt { - Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) " - _OLD_VIRTUAL_PROMPT - } - $env:VIRTUAL_ENV_PROMPT = $Prompt -} - -# Clear PYTHONHOME -if (Test-Path -Path Env:PYTHONHOME) { - Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME - Remove-Item -Path Env:PYTHONHOME -} - -# Add the venv to the PATH -Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH -$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH" diff --git a/path/to/your/virtualenv/bin/activate b/path/to/your/virtualenv/bin/activate deleted file mode 100644 index e28c6fd..0000000 --- a/path/to/your/virtualenv/bin/activate +++ /dev/null @@ -1,69 +0,0 @@ -# This file must be used with "source bin/activate" *from bash* -# you cannot run it directly - -deactivate () { - # reset old environment variables - if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then - PATH="${_OLD_VIRTUAL_PATH:-}" - export PATH - unset _OLD_VIRTUAL_PATH - fi - if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then - PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" - export PYTHONHOME - unset _OLD_VIRTUAL_PYTHONHOME - fi - - # This should detect bash and zsh, which have a hash command that must - # be called to get it to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected - if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then - hash -r 2> /dev/null - fi - - if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then - PS1="${_OLD_VIRTUAL_PS1:-}" - export PS1 - unset _OLD_VIRTUAL_PS1 - fi - - unset VIRTUAL_ENV - unset VIRTUAL_ENV_PROMPT - if [ ! "${1:-}" = "nondestructive" ] ; then - # Self destruct! - unset -f deactivate - fi -} - -# unset irrelevant variables -deactivate nondestructive - -VIRTUAL_ENV="/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv" -export VIRTUAL_ENV - -_OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/bin:$PATH" -export PATH - -# unset PYTHONHOME if set -# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) -# could use `if (set -u; : $PYTHONHOME) ;` in bash -if [ -n "${PYTHONHOME:-}" ] ; then - _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" - unset PYTHONHOME -fi - -if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then - _OLD_VIRTUAL_PS1="${PS1:-}" - PS1="(virtualenv) ${PS1:-}" - export PS1 - VIRTUAL_ENV_PROMPT="(virtualenv) " - export VIRTUAL_ENV_PROMPT -fi - -# This should detect bash and zsh, which have a hash command that must -# be called to get it to forget past commands. Without forgetting -# past commands the $PATH changes we made may not be respected -if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then - hash -r 2> /dev/null -fi diff --git a/path/to/your/virtualenv/bin/activate.csh b/path/to/your/virtualenv/bin/activate.csh deleted file mode 100644 index 41c7753..0000000 --- a/path/to/your/virtualenv/bin/activate.csh +++ /dev/null @@ -1,26 +0,0 @@ -# This file must be used with "source bin/activate.csh" *from csh*. -# You cannot run it directly. -# Created by Davide Di Blasi . -# Ported to Python 3.3 venv by Andrew Svetlov - -alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate' - -# Unset irrelevant variables. -deactivate nondestructive - -setenv VIRTUAL_ENV "/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv" - -set _OLD_VIRTUAL_PATH="$PATH" -setenv PATH "$VIRTUAL_ENV/bin:$PATH" - - -set _OLD_VIRTUAL_PROMPT="$prompt" - -if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then - set prompt = "(virtualenv) $prompt" - setenv VIRTUAL_ENV_PROMPT "(virtualenv) " -endif - -alias pydoc python -m pydoc - -rehash diff --git a/path/to/your/virtualenv/bin/activate.fish b/path/to/your/virtualenv/bin/activate.fish deleted file mode 100644 index 54225c9..0000000 --- a/path/to/your/virtualenv/bin/activate.fish +++ /dev/null @@ -1,66 +0,0 @@ -# This file must be used with "source /bin/activate.fish" *from fish* -# (https://fishshell.com/); you cannot run it directly. - -function deactivate -d "Exit virtual environment and return to normal shell environment" - # reset old environment variables - if test -n "$_OLD_VIRTUAL_PATH" - set -gx PATH $_OLD_VIRTUAL_PATH - set -e _OLD_VIRTUAL_PATH - end - if test -n "$_OLD_VIRTUAL_PYTHONHOME" - set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME - set -e _OLD_VIRTUAL_PYTHONHOME - end - - if test -n "$_OLD_FISH_PROMPT_OVERRIDE" - functions -e fish_prompt - set -e _OLD_FISH_PROMPT_OVERRIDE - functions -c _old_fish_prompt fish_prompt - functions -e _old_fish_prompt - end - - set -e VIRTUAL_ENV - set -e VIRTUAL_ENV_PROMPT - if test "$argv[1]" != "nondestructive" - # Self-destruct! - functions -e deactivate - end -end - -# Unset irrelevant variables. -deactivate nondestructive - -set -gx VIRTUAL_ENV "/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv" - -set -gx _OLD_VIRTUAL_PATH $PATH -set -gx PATH "$VIRTUAL_ENV/bin" $PATH - -# Unset PYTHONHOME if set. -if set -q PYTHONHOME - set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME - set -e PYTHONHOME -end - -if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" - # fish uses a function instead of an env var to generate the prompt. - - # Save the current fish_prompt function as the function _old_fish_prompt. - functions -c fish_prompt _old_fish_prompt - - # With the original prompt function renamed, we can override with our own. - function fish_prompt - # Save the return status of the last command. - set -l old_status $status - - # Output the venv prompt; color taken from the blue of the Python logo. - printf "%s%s%s" (set_color 4B8BBE) "(virtualenv) " (set_color normal) - - # Restore the return status of the previous command. - echo "exit $old_status" | . - # Output the original/"old" prompt. - _old_fish_prompt - end - - set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" - set -gx VIRTUAL_ENV_PROMPT "(virtualenv) " -end diff --git a/path/to/your/virtualenv/bin/pip b/path/to/your/virtualenv/bin/pip deleted file mode 100755 index 3944467..0000000 --- a/path/to/your/virtualenv/bin/pip +++ /dev/null @@ -1,8 +0,0 @@ -#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 -# -*- coding: utf-8 -*- -import re -import sys -from pip._internal.cli.main import main -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/pip3 b/path/to/your/virtualenv/bin/pip3 deleted file mode 100755 index 3944467..0000000 --- a/path/to/your/virtualenv/bin/pip3 +++ /dev/null @@ -1,8 +0,0 @@ -#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 -# -*- coding: utf-8 -*- -import re -import sys -from pip._internal.cli.main import main -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/pip3.10 b/path/to/your/virtualenv/bin/pip3.10 deleted file mode 100755 index 3944467..0000000 --- a/path/to/your/virtualenv/bin/pip3.10 +++ /dev/null @@ -1,8 +0,0 @@ -#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 -# -*- coding: utf-8 -*- -import re -import sys -from pip._internal.cli.main import main -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/pip3.11 b/path/to/your/virtualenv/bin/pip3.11 deleted file mode 100755 index 3944467..0000000 --- a/path/to/your/virtualenv/bin/pip3.11 +++ /dev/null @@ -1,8 +0,0 @@ -#!/home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv/bin/python3 -# -*- coding: utf-8 -*- -import re -import sys -from pip._internal.cli.main import main -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/path/to/your/virtualenv/bin/python b/path/to/your/virtualenv/bin/python deleted file mode 120000 index b8a0adb..0000000 --- a/path/to/your/virtualenv/bin/python +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/path/to/your/virtualenv/bin/python3 b/path/to/your/virtualenv/bin/python3 deleted file mode 120000 index 79ab74b..0000000 --- a/path/to/your/virtualenv/bin/python3 +++ /dev/null @@ -1 +0,0 @@ -/usr/local/bin/python3 \ No newline at end of file diff --git a/path/to/your/virtualenv/bin/python3.11 b/path/to/your/virtualenv/bin/python3.11 deleted file mode 120000 index b8a0adb..0000000 --- a/path/to/your/virtualenv/bin/python3.11 +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/path/to/your/virtualenv/lib64 b/path/to/your/virtualenv/lib64 deleted file mode 120000 index 7951405..0000000 --- a/path/to/your/virtualenv/lib64 +++ /dev/null @@ -1 +0,0 @@ -lib \ No newline at end of file diff --git a/path/to/your/virtualenv/pyvenv.cfg b/path/to/your/virtualenv/pyvenv.cfg deleted file mode 100644 index e7250de..0000000 --- a/path/to/your/virtualenv/pyvenv.cfg +++ /dev/null @@ -1,5 +0,0 @@ -home = /usr/local/bin -include-system-site-packages = false -version = 3.11.0 -executable = /usr/local/bin/python3.11 -command = /usr/local/bin/python3 -m venv /home/mihaistate/COMP30030/esbmc_ai_fork/esbmc-ai/path/to/your/virtualenv diff --git a/samples/branches.c b/samples/branches.c new file mode 100644 index 0000000..39dbf4e --- /dev/null +++ b/samples/branches.c @@ -0,0 +1,8 @@ +#include +#include + +int main() { + char* value = "branches"; + assert(strcmp("branches", value) == 0); + return 0; +} \ No newline at end of file diff --git a/tests/test_output_reducer.py b/tests/test_output_reducer.py index 7a2c6f5..b5b8217 100644 --- a/tests/test_output_reducer.py +++ b/tests/test_output_reducer.py @@ -1,12 +1,9 @@ -import re import sys -import nltk import unittest -from nltk.tokenize import word_tokenize,sent_tokenize -print(sys.path) + from esbmc_ai import esbmc_util -nltk.download('punkt') + class TestEsbmcOutputOptimisation(unittest.TestCase): @@ -47,6 +44,7 @@ def test_esbmc_output_optimisation(self): expected_output = '''Starting with the source code, we see that it includes some external functions and declares global variables. These variables are of type int and are initialized with nondeterministic values using the `__VERIFIER_nondet_int()` function. The `assume()` function is then used to specify assumptions about the values of these variables After that, it mentions the number of verification condition checks (VCCs) generated and the number remaining after simplification. + This is the result In this case, ESBMC reports "VERIFICATION SUCCESSFUL" and mentions that a solution was found by the forward condition. This means that all states in the program are reachable. Finally, ESBMC indicates a successful verification.''' @@ -133,23 +131,14 @@ def test_reduce_output2(self): - **"Solution found by the forward condition; all states are reachable (k = 1)"**: ESBMC concluded that, under the given conditions, every state it could consider within one step is reachable without uncovering any logical inconsistencies or errors.''' self.maxDiff = None output_str2 = esbmc_util.reduce_output2(input2) - - #print('Filtered output2:') #print(output_str2) - #print('Expected output2: ') self.assertEqual(output_str2, expected_output2) output_str3 = esbmc_util.reduce_output2(input3) self.assertEqual(output_str3, expected_output3) - #print('Filtered output 3') - #print(output_str3) - #print('Expected output3:') - #print(expected_output3) + output_str4 = esbmc_util.reduce_output2(input4) self.assertEqual(output_str4, expected_output4) - #print('Filtered output 4') - #print(output_str4) - # print('Expected output4:') - # print(expected_output4) + print(output_str4) def test_remove_patterns_nltk(self):