-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathFlipLogGPT.py
138 lines (104 loc) · 5.88 KB
/
FlipLogGPT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain.llms import GPT4All, LlamaCpp
import os
import argparse
import time
load_dotenv()
embeddings_model_name = os.environ.get("EMBEDDINGS_MODEL_NAME")
persist_directory = os.environ.get('PERSIST_DIRECTORY')
model_type = os.environ.get('MODEL_TYPE')
model_path = os.environ.get('MODEL_PATH')
model_n_ctx = os.environ.get('MODEL_N_CTX')
model_n_batch = int(os.environ.get('MODEL_N_BATCH',8))
target_source_chunks = int(os.environ.get('TARGET_SOURCE_CHUNKS',4))
from constants import CHROMA_SETTINGS
def react_for_logs(query):
manual_react = f"""
Find and report which logs breach the compliance policy. Provide actionable insights to mitigate the risk of future breaches.
System Log:
[2023-08-20 12:15:32] [info] [client 192.168.0.1] GET /index.html HTTP/1.1 200 5124
Compliance Breaches:
Successful page request- The log entry shows a successful GET request (GET /index.html) from client IP 192.168.0.1, which returned a 200 status code. This indicates that the client accessed the homepage successfully, and the page size was 5124 bytes.
Actionable Insights:
Regularly review and monitor server configurations to ensure all required files and directories exist and are accessible.
Implement proper access controls and permissions to restrict unauthorized access to sensitive files or system resources.
Find and report which logs breach the compliance policy. Provide actionable insights to mitigate the risk of future breaches.
System Log:
[2023-08-20 12:17:45] [error] [client 192.168.0.2] File does not exist: /var/www/html/includes/config.php
Compliance Breaches:
Missing file- The log entry indicates an error where the client at IP 192.168.0.2 requested a file (/var/www/html/includes/config.php) that does not exist. This could be an indication of a misconfiguration or an attempt to access sensitive files.
Actionable Insights:
Review the server configuration to ensure that the file path is correct and that sensitive files are not accessible to the public.
Monitor the IP address 192.168.0.2 for further suspicious activity or repeated attempts to access sensitive files.
Find and report which logs breach the compliance policy. Provide actionable insights to mitigate the risk of future breaches.
System Log:
[2023-08-20 12:19:10] [info] [client 192.168.0.3] POST /login.php HTTP/1.1 302 0
Compliance Breaches:
The log entry shows an info-level log indicating a POST request (POST /login.php) from client IP 192.168.0.3, which resulted in a 302 status code. This suggests a form submission, likely for user authentication or login.
Actionable Insights:
No immediate action is required unless there are suspicious patterns associated with this IP address.
Find and report which logs breach the compliance policy. Provide actionable insights to mitigate the risk of future breaches.
System Log:
{query}
"""
return manual_react
def react_for_system_policies(query):
manual_react = f"""
List out all the possible system policies that are being violated.
{query}
"""
return manual_react
def main():
# Parse the command line arguments
args = parse_arguments()
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
# activate/deactivate the streaming StdOut callback for LLMs
callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]
# Prepare the LLM
match model_type:
case "LlamaCpp":
llm = LlamaCpp(model_path=model_path, max_tokens=model_n_ctx, n_batch=model_n_batch, callbacks=callbacks, verbose=False)
case "GPT4All":
llm = GPT4All(model=model_path, max_tokens=model_n_ctx, backend='gptj', n_batch=model_n_batch, callbacks=callbacks, verbose=False)
case _default:
# raise exception if model_type is not supported
raise Exception(f"Model type {model_type} is not supported. Please choose one of the following: LlamaCpp, GPT4All")
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents= not args.hide_source)
# Interactive questions and answers
while True:
query = input("\nEnter a query: ")
if query == "exit":
break
if query.strip() == "":
continue
# Get the answer from the chain
start = time.time()
res = qa(react_for_logs(query))
answer, docs = res['result'], [] if args.hide_source else res['source_documents']
end = time.time()
# Print the result
print("\n\n> Question:")
print(query)
print(f"\n> Answer (took {round(end - start, 2)} s.):")
print(answer)
# Print the relevant sources used for the answer
for document in docs:
print("\n> " + document.metadata["source"] + ":")
print(document.page_content)
def parse_arguments():
parser = argparse.ArgumentParser(description='Interactive LLM for logs and security analysis with vectorstores')
parser.add_argument("--hide-source", "-S", action='store_true',
help='Use this flag to disable printing of source documents used for answers.')
parser.add_argument("--mute-stream", "-M",
action='store_true',
help='Use this flag to disable the streaming StdOut callback for LLMs.')
return parser.parse_args()
if __name__ == "__main__":
main()