-
Notifications
You must be signed in to change notification settings - Fork 3
/
main.py
285 lines (261 loc) · 16.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# Copyright 2024 Fondazione Bruno Kessler
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import streamlit as st
import graphviz
import random
import json
from tabs.sidebar import sidebar
from tabs.application_info import application_info
from tabs.dfd import dfd
from tabs.threat_model import threat_model
from tabs.linddun_go import linddun_go
from tabs.linddun_pro import linddun_pro
from tabs.risk_assessment import risk_assessment
from tabs.report import report
def init_session_state():
"""
This function initializes the session state for the application. It creates
the necessary session state variables and sets their initial values. To
understand the structure of the session state, please refer to the comments
in the code below. If the code uses a session state variable that is not
initialized here, it probably means that the variable is associated with a
specific streamlit element, with the "key" parameter. In that case, the
variable is initialized when the element is created and always has the
value of the element. Look for the element in the corresponding tab file to
understand how the variable acts.
"""
# Initialize the session state for the sidebar
if "keys" not in st.session_state:
# keys is a dictionary that will store the API keys, indexed such as
# "openai_api_key", "google_api_key", "mistral_api_key"
st.session_state["keys"] = {}
if "openai_model" not in st.session_state:
# openai_model is a string that will store the OpenAI model to use
st.session_state["openai_model"] = "gpt-4o-mini"
if "google_model" not in st.session_state:
# google_model is a string that will store the Google AI model to use
st.session_state["google_model"] = "gemini-1.5-pro-latest"
if "mistral_model" not in st.session_state:
# mistral_model is a string that will store the Mistral model to use
st.session_state["mistral_model"] = "mistral-large-latest"
# Initialize the session state for the Application Info and DFD tabs
if "input" not in st.session_state:
# "input" is a dictionary that stores all the user input for the
# application information
st.session_state["input"] = {}
# The dictionary has the following keys:
# - app_description: string. A detailed description of the application
# - app_type: string. The type of the application
# - types_of_data: list. The types of data collected by the application
# - has_database: bool. Whether the application describes the data collected
# - database: list of dict. The type of data stored in the database. Each
# dict has the following keys:
# - data_type: string. The type of data stored in the database
# - encryption: bool. Whether the data type is encrypted
# - sensitive: bool. Whether the data is considered sensitive
# - notes: string. Additional information about the data type
# - data_policy: string. The data retention and deletion policy of the
# application
# - user_data_control: string. The actions the user can perform on their data
# - dfd: list of dict. The Data Flow Diagram of the application. Each dict
# has the following keys:
# - from: string. The entity where the data flow starts
# - typefrom: string. The type of the entity where the data flow starts
# - to: string. The entity where the data flow ends
# - typeto: string. The type of the entity where the data flow ends
# - trusted: bool. Whether the data flow is trusted
# - graph: graphviz.Digraph. The graph representation of the Data Flow
# Diagram, as a graphviz Digraph object
st.session_state["input"]["app_description"] = ""
st.session_state["input"]["app_type"] = ""
st.session_state["input"]["types_of_data"] = []
st.session_state["input"]["has_database"] = False
st.session_state["input"]["database"] = [
{"data_type": "Name", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Email", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Password", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Address", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Location", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Phone number", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Date of Birth", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "ID card number", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
{"data_type": "Last access time", "encryption": True, "sensitive": True, "third_party": False, "storage_location": "", "purpose": "", "notes": ""},
]
st.session_state["input"]["data_policy"] = ""
st.session_state["input"]["user_data_control"] = ""
st.session_state["input"]["dfd"] = [
{"from": "User", "typefrom": "Entity", "to": "Application", "typeto": "Process", "trusted": True },
]
st.session_state["input"]["graph"] = graphviz.Digraph()
st.session_state["input"]["graph"].attr(
bgcolor=f"{st.get_option("theme.backgroundColor")}",
)
if "backup_database" not in st.session_state:
# "backup_database" is a list of dictionaries that stores the backup of the database information, to be able to restore it if needed
st.session_state["backup_database"] = st.session_state["input"]["database"].copy()
if "dfd_only" not in st.session_state:
# "dfd_only" is a boolean that indicates whether only the DFD is
# needed, in order to disable the application description
st.session_state["dfd_only"] = False
if "is_graph_generated" not in st.session_state:
# "is_graph_generated" is a boolean that indicates whether the graph
# has already been generated, to know if it has been updated at least
# once
st.session_state["is_graph_generated"] = False
if "graph_seed" not in st.session_state:
# "graph_seed" is a string that stores a random seed to generate the
# graph, such that it changes every time the graph is updated
st.session_state["graph_seed"] = str(random.randint(0, 100))
# Initialize the session state for the Threat Model tab
if "threat_model_output" not in st.session_state:
# "threat_model_output" is a string that will store the Markdown output of the threat model
st.session_state["threat_model_output"] = ""
if "threat_model_threats" not in st.session_state:
# "threat_model_threats" is a list of dictionaries that will store the JSON output of the threat model.
# Each dictionary represents a threat, and contains the following keys:
# - "title": string. The title of the threat.
# - "threat_type": string. The LINDDUN category of the threat, such as "L - Linking"
# - "Scenario": string. The scenario in which the threat occurs.
# - "Reason": string. The reason for the detection of the threat.
st.session_state["threat_model_threats"] = []
# Initialize the session state for the LINDDUN Go tab
if "linddun_go_output" not in st.session_state:
# "linddun_go_output" is a string that stores the Markdown output of the LINDDUN Go simulation
st.session_state["linddun_go_output"] = ""
if "linddun_go_threats" not in st.session_state:
# "linddun_go_threats" is a list of dictionaries that stores the threats generated by the LINDDUN Go simulation.
# Each dictionary represents a threat and contains the following
# keys:
# - "question": string. The questions on the card, asked to the LLM to elicit the threat.
# - "threat_title": string. The title of the threat.
# - "threat_description": string. The description of the threat.
# - "threat_type": int. The LINDDUN category of the threat, from 1 to 7.
# - "reply": boolean. Whether the threat was deemed present or not in the application by the LLM.
# - "reason": string. The reason for the detection or non-detection of the threat.
st.session_state["linddun_go_threats"] = []
if "max_threats" not in st.session_state:
# "max_threats" is an integer that stores the maximum number of threats that can be analyzed in the LINDDUN Go simulation.
# It is used to set the slider for the number of threats to analyze.
# It is determined by the total number of cards in the LINDDUN Go deck.
with open("misc/deck.json", "r") as deck_file:
deck = json.load(deck_file)
st.session_state["max_threats"] = len(deck["cards"])
# Initialize session state for the LINDDUN Pro tab
if "linddun_pro_output" not in st.session_state:
# "linddun_pro_output" is a string used to store the markdown output of the LINDDUN Pro threat model
st.session_state["linddun_pro_output"] = ""
if "linddun_pro_threats" not in st.session_state:
# "linddun_pro_threats" is a list of lists of dictionaries used to store the threats for each edge in the DFD.
# The list has the same length as the DFD, and each element is a list of threats for the corresponding edge, one for each of the LINDDUN categories.
# Thus, the structure is a matrix of N rows (one for each edge) and 7 columns (one for each LINDDUN category), where each cell is a dictionary with the threat information.
# The dictionary contains the following keys:
# - "category": string. The category of the threat, such as "Linking".
# - "source_id": string. The ID of the source of the threat.
# - "source_title": string. The title of the threat at the source.
# - "source": string. The description of the threat at the source.
# - "data_flow_id": string. The ID of the data flow of the threat.
# - "data_flow_title": string. The title of the threat at the data flow.
# - "data_flow": string. The description of the threat at the data flow.
# - "destination_id": string. The ID of the destination of the threat.
# - "destination_title": string. The title of the threat at the destination.
# - "destination": string. The description of the threat at the destination.
# - "edge": dictionary. The edge of the DFD that the threat is associated with, with the same keys as the DFD edge.
st.session_state["linddun_pro_threats"] = []
# Initialize session state for the Risk Assessment tab
if "to_assess" not in st.session_state:
# "to_assess" is a list of dictionaries used to store the threats to
# assess. Each dictionary can contain different keys depending on the
# threat elicitation method that has been used. For Threat Model and
# LINDDUN Go, the dictionaries contain the same keys as
# "threat_model_threats" and "linddun_go_threats", respectively.
# For LINDDUN Pro, the dictionaries contain the following keys:
# - "category": string. The category of the threat.
# - "description": string. The description of the threat.
# - "edge": dictionary. The edge of the DFD that the threat is associated with, with the same keys as the DFD edge.
# - "threat_tree_node": string. The nodes of the threat tree involved in the threat.
# - "threat_title": string. The title of the threat.
# - "threat_location": string. The location of the threat in the DFD edge (source, data_flow, or destination).
# - "data_flow_number": integer. The number of the data flow in the DFD edge
st.session_state["to_assess"] = []
if "current_threat" not in st.session_state:
# "current_threat" is an integer used to store the index of the current threat being assessed.
st.session_state["current_threat"] = 0
if "threat_source" not in st.session_state:
# "threat_source" is a string used to store the source of the threats being assessed.
st.session_state["threat_source"] = ""
if "assessments" not in st.session_state:
# "assessments" is a list of dictionaries used to store the impact assessments of the threats.
# Each dictionary contains only one key (in the future, it could be expanded to include more information):
# - "impact": string. The impact of the threat on the system.
st.session_state["assessments"] = []
if "control_measures" not in st.session_state:
# "control_measures" is a list of lists of dictionaries used to store
# the control measures for the threats. The list has the same length as
# the "to_assess" list, and each element is a list of dictionaries
# representing control measures for the corresponding threat. Thus, the
# structure is a matrix of N rows (one for each threat) and M columns
# (one for each control measure), where each cell is a dictionary with
# the control measure information.
# Each dictionary contains the following keys:
# - "filename": string. The filename of the control measure on the Privacy Patterns website.
# - "title": string. The title of the control measure.
# - "explanation": string. The explanation of the control measure.
# - "implementation": string. The implementation of the control measure.
st.session_state["control_measures"] = []
if "to_report" not in st.session_state:
# "to_report" is a list of booleans used to store whether each threat should be included in the report.
st.session_state["to_report"] = []
# Streamlit configuration
st.set_page_config(
page_title="P.I.L.L.A.R.",
page_icon="images/logo1.png",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Report a bug': "https://github.com/AndreaBissoli/PILLAR/issues",
'About': """
**PILLAR** (**P**rivacy risk **I**dentification with **L**INDDUN and **L**LM
**A**nalysis **R**eport) is a tool developed by [Andrea Bissoli](https://www.linkedin.com/in/andrea-bissoli/)
under the supervision of
[Dr. Majid Mollaeefar](https://www.linkedin.com/in/majid-mollaeefar/) as an
internship project for [Fondazione Bruno Kessler](https://www.fbk.eu/). The
tool is designed to help developers and security professionals to assess the
privacy and information leakage risks of their applications. It provides a
user-friendly interface to create Data Flow Diagrams, generate threat models,
and perform risk assessments based on the LINDDUN methodology. The tool is
open-source and can be found on
[GitHub](https://github.com/AndreaBissoli/PILLAR).""",
}
)
# Initialization for the whole app
init_session_state()
# Call all the UI functions
sidebar()
tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs(
["Application Info", "DFD", "Threat Model", "LINDDUN Go", "LINDDUN Pro", "Risk Assessment", "Report"],
)
with tab1:
application_info()
with tab2:
dfd()
with tab3:
threat_model()
with tab4:
linddun_go()
with tab5:
linddun_pro()
with tab6:
risk_assessment()
with tab7:
report()