Skip to content

Commit

Permalink
Merge pull request #258 from COS301-SE-2024/develop
Browse files Browse the repository at this point in the history
Develop into Main
  • Loading branch information
Yudi-G authored Oct 18, 2024
2 parents e2b2f2f + 8f0d8ff commit 20eeabe
Show file tree
Hide file tree
Showing 77 changed files with 4,442 additions and 1,985 deletions.
Binary file modified .DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion .github/workflows/frontendTests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ jobs:
cache: 'npm'
cache-dependency-path: gnd-app/package-lock.json

- name: Install Chrome
- name: Install Chrome and install pandoc
run: |
sudo apt-get update
sudo apt-get install -y google-chrome-stable
sudo apt-get install pandoc
- run: |
cd gnd-app
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ env
.env
.vs/
.pyc
# /gnd-app/src/environments/environment.ts
# /gnd-app/src/environments/environment.prod.ts
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![image](https://github.com/user-attachments/assets/98c03af9-5f20-4d6e-8140-b4a69f7ff72f)<img src="https://github.com/COS301-SE-2024/GDPR-data-noncompliance-detector/blob/develop/documentation/headshots/GND.png" alt="GND Logo">
![image]()<img src="https://github.com/COS301-SE-2024/GDPR-data-noncompliance-detector/blob/develop/documentation/headshots/GND.png" alt="GND Logo">

[![codecov](https://codecov.io/github/COS301-SE-2024/GDPR-data-noncompliance-detector/graph/badge.svg?token=nEPpXWGssM)](https://codecov.io/github/COS301-SE-2024/GDPR-data-noncompliance-detector)
![GitHub issues](https://img.shields.io/github/issues/COS301-SE-2024/GDPR-data-noncompliance-detector)
Expand Down Expand Up @@ -42,7 +42,9 @@ The GDPR Data Noncompliance Detector is a software tool designed to identify ins
<br></br>
# Download

[EXE Download]([https://drive.google.com/drive/folders/1LANx27MNDwmfvDtj09NFvJ05s91Ya-7n?usp=sharing](https://drive.google.com/file/d/1-zytmykMq8Prb-izSKI4gjCQbvzhBZk9/view?usp=drive_link))

[https://drive.google.com/drive/folders/1LANx27MNDwmfvDtj09NFvJ05s91Ya-7n?usp=sharing](https://drive.google.com/file/d/1WV0WCWq1VSLAgREVHJ523JzclZXMwBnW/view?usp=sharing


#Demo 4 Input

Expand All @@ -67,6 +69,10 @@ The GDPR Data Noncompliance Detector is a software tool designed to identify ins
## User Manual
[GND User Manual](https://github.com/COS301-SE-2024/GDPR-data-noncompliance-detector/blob/develop/documentation/GND%20Manual_V2.pdf)

## Machine Learning Training Specification
[AIML_Spec.pdf](https://github.com/user-attachments/files/17433721/AIML_Spec.pdf)


## Project Management Tools
GitHub Issues and GitHub Boards

Expand Down
Binary file modified backend/.DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion backend/Detection_Engine/RAG.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def interpret_findings(self, labels):
'LABEL_4': ['6','7(1)','7(2)','7(3)'],
'LABEL_5': ['12']
}

#['5','6',['9','9(1)','9(2)(b)','9(2)(g)']]
unique_labels = set()
count = 0

Expand Down
28 changes: 27 additions & 1 deletion backend/Detection_Engine/biometric_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import sys
import os
import glob
from gradio_client import Client, handle_file
import re


# below added because of the relative import error
Expand Down Expand Up @@ -80,6 +82,28 @@ def biometric_detect_people(self, source):
if item['label'] != 'person':
output.remove(item)
return output


def biometric_detect_finger(self, source):
client = Client("abdullahsajid/multimodal-antispoofing-detection")
result = client.predict(
image=handle_file(source),
api_name="/handle_button_click_finger"
)
real_pattern = re.compile(r'Real:</b>\s*(\d+\.\d+)%')
spoof_pattern = re.compile(r'Spoof:</b>\s*(\d+\.\d+)%')

real_match = real_pattern.search(result)
spoof_match = spoof_pattern.search(result)

real_percentage = float(real_match.group(1) ) if real_match else None
spoof_percentage = float(spoof_match.group(1)) if spoof_match else None

if real_percentage > 75 or spoof_percentage > 75:
return True
return False



def biometric_detect_all(self,pdf_path):
# clean up folders
Expand All @@ -96,6 +120,8 @@ def biometric_detect_all(self,pdf_path):
count = 0
for image in images:
people = self.biometric_detect_people(image)
# if(self.biometric_detect_finger(image)):
# count += 1
for person in people:
count += 1

Expand Down Expand Up @@ -180,5 +206,5 @@ def biometric_detect_all(self,pdf_path):
# print(biometric_detect_all("../mockdata/excelWimages.xlsx"))
# Example for accessing mock data if it's bundled with PyInstaller
bm = biometric_detection()
print(bm.biometric_detect_people("../mockdata/p7.png"))
print(bm.biometric_detect_finger("../mockdata/p3.png"))
# extract_images_from_excel("../mockdata/excelWimages.xlsx")
103 changes: 88 additions & 15 deletions backend/Detection_Engine/detection_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import math
import base64
import concurrent.futures

class detection_engine:

Expand Down Expand Up @@ -58,20 +59,74 @@ def flag(self, ner_result, reg_result_contact,reg_result_financial,reg_result_pe
ner_val = self.extract_number(ner_result)
total = ner_val + reg_result_contact + reg_result_financial + reg_result_personal + gi_result + em_result

def generate_location_report(self, text):
return self.report_generator.location_report_generation(text)

def generate_ner_report(self, text):
return self.report_generator.ner_report(text)

def generate_regex_report_personal(self, text):
return self.regex_report_personal(text)

def generate_regex_report_financial(self, text):
return self.regex_report_financial(text)

def generate_regex_report_contact(self, text):
return self.regex_report_contact(text)

def generate_ca_report(self, text):
return self.report_generator.CA_report(text)

def generate_gi_report(self, text):
return self.report_generator.gen_report(text)

def generate_em_report(self, text):
return self.report_generator.EM_report(text)

def generate_md_report(self, text):
return self.report_generator.MD_report(text)

def generate_image_report(self, path_):
return self.report_generator.Image_report_generation(path_)


def process(self, path, path_):

text = path

location = self.report_generator.location_report_generation(text)
ner_result = self.report_generator.ner_report(text)
with concurrent.futures.ThreadPoolExecutor() as executor:
future_location = executor.submit(self.generate_location_report, text)
future_ner = executor.submit(self.generate_ner_report, text)
future_reg_personal = executor.submit(self.generate_regex_report_personal, text)
future_reg_financial = executor.submit(self.generate_regex_report_financial, text)
future_reg_contact = executor.submit(self.generate_regex_report_contact, text)
future_ca = executor.submit(self.generate_ca_report, text)
future_gi = executor.submit(self.generate_gi_report, text)
future_em = executor.submit(self.generate_em_report, text)
# future_md = executor.submit(self.generate_md_report, text)

location = future_location.result()
ner_result = future_ner.result()
reg_result_personal = future_reg_personal.result()
reg_result_financial = future_reg_financial.result()
reg_result_contact = future_reg_contact.result()
ca_statement = future_ca.result()
gi_result = future_gi.result()
em_result = 50000
# em_result = future_em.result()
md_result = 50000
# md_result = future_md.result()

# location = self.report_generator.location_report_generation(text)
# ner_result = self.report_generator.ner_report(text)
# location = self.determine_country_of_origin(path)
reg_result_personal = self.regex_report_personal(text)
reg_result_financial = self.regex_report_financial(text)
reg_result_contact = self.regex_report_contact(text)
ca_statement = self.report_generator.CA_report(text)
gi_result = self.report_generator.gen_report(text)
em_result = self.report_generator.EM_report(text)
md_result = self.report_generator.MD_report(text)
# reg_result_personal = self.regex_report_personal(text)
# reg_result_financial = self.regex_report_financial(text)
# reg_result_contact = self.regex_report_contact(text)
# ca_statement = self.report_generator.CA_report(text)
# gi_result = self.report_generator.gen_report(text)
# em_result = self.report_generator.EM_report(text)
# md_result = self.report_generator.MD_report(text)
status = ""

if self.flag(ner_result, reg_result_contact,reg_result_financial,reg_result_personal, gi_result, em_result) == 0:
Expand Down Expand Up @@ -182,20 +237,38 @@ def get_status(self, ner_count, personal_data,financial_data, contact_data, medi

def report_generation(self, path, path_):

text = path
location_report = 0
reg_result_personal_report = 0
# ca_statement_report = 0
em_result_report = 0
image_result_report = 0

location_report = self.report_generator.location_report_generation(text)
text = path
with concurrent.futures.ThreadPoolExecutor() as executor:
future_location = executor.submit(self.generate_location_report, text)
future_reg_personal = executor.submit(self.generate_regex_report_personal, text)
future_ca = executor.submit(self.generate_ca_report, text)
future_em = executor.submit(self.generate_em_report, text)
future_image = executor.submit(self.generate_image_report,path_)

location_report = future_location.result()
reg_result_personal_report = future_reg_personal.result()
# ca_statement_report = future_ca.result()
em_result_report = future_em.result()
image_result_report = future_image.result()

# location_report = self.report_generator.location_report_generation(text)
ner_result_report = self.report_generator.ner_report_generation(text)
reg_result_personal_report = self.regex_report_personal(text) + self.report_generator.gen_report(text)
# reg_result_personal_report = self.regex_report_personal(text) + self.report_generator.gen_report(text)
reg_result_financial_report = self.regex_report_financial(text)
reg_result_contact_report = self.regex_report_contact(text)
ca_statement_report = self.report_generator.CA_report_generation(text)
gi_result_report = self.report_generator.GF_report(text)
em_result_report = self.report_generator.EM_report(text)
# em_result_report = self.report_generator.EM_report(text)
md_result_report = self.report_generator.MD_report(text)
image_result_report = self.report_generator.Image_report_generation(path_)
# image_result_report = self.report_generator.Image_report_generation(path_)
rag_stat, rag_count = self.report_generator.RAG_report(ner_result_report , reg_result_personal_report, reg_result_financial_report, reg_result_contact_report, md_result_report,ca_statement_report, gi_result_report, em_result_report, image_result_report)

# ca_statement_report = self.report_generator.CA_report_generation(text)
# ner_result_text = self.report_generator.ner_report_text(text)
ner_pdf_bytes = self.report_generator.ner_report_text(text, path_)
pdf_base64 = base64.b64encode(ner_pdf_bytes.read()).decode('utf-8')
Expand Down
5 changes: 2 additions & 3 deletions backend/Detection_Engine/report_generation_layer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from Detection_Engine.text_classification_layer import text_classification_layer
from Detection_Engine.biometric_detection import biometric_detection
from Detection_Engine.lang_detection import location_finder
from langcodes import Language

from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
Expand Down Expand Up @@ -189,8 +188,8 @@ def RAG_report(self, ner_result , personal, financial, contact, medical, ca_stat
categories.append('Biometric Data')

rag_res, rag_count = self.classification_layer.run_RAG(categories)
result = "The following GDPR articles are potentially violated: " + ", ".join(rag_res)
return result, rag_count
# result = "The following GDPR articles are potentially violated: " + ", ".join(rag_res)
return rag_res, rag_count



Expand Down
Loading

0 comments on commit 20eeabe

Please sign in to comment.