-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
100 lines (73 loc) · 3.21 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from flask import Flask, request, render_template
import re
import math
import string
app = Flask("__name__")
# Define thresholds for detecting AI-generated text and plagiarism
ai_generated_threshold = 0.8
ai_detection_threshold = 10 # AI Text detection threshold
plagiarism_threshold = 10 # Plagiarism detection threshold
def calculate_ai_percentage(text):
# Remove punctuation and numbers from the text
text = re.sub(f"[{string.punctuation}0-9]", "", text)
# Calculate the ratio of unique words to total words
words = text.lower().split()
unique_words = set(words)
unique_ratio = len(unique_words) / len(words)
# Calculate the AI text percentage
ai_percentage = (1 - unique_ratio) * 100
return ai_percentage
@app.route("/")
def loadPage():
return render_template('index.html')
@app.route("/", methods=['POST'])
def detect_plagiarism_and_ai_text():
try:
inputQuery = request.form['query']
lowercaseQuery = inputQuery.lower()
# Replace punctuation by space and split
queryWordList = re.sub("[^\w]", " ", lowercaseQuery).split()
universalSetOfUniqueWords = list(set(queryWordList))
fd = open("database1.txt", "r")
database1 = fd.read().lower()
# Replace punctuation by space and split
databaseWordList = re.sub("[^\w]", " ", database1).split()
universalSetOfUniqueWords += list(set(databaseWordList) - set(universalSetOfUniqueWords))
queryTF = []
databaseTF = []
for word in universalSetOfUniqueWords:
queryTfCounter = queryWordList.count(word)
databaseTfCounter = databaseWordList.count(word)
queryTF.append(queryTfCounter)
databaseTF.append(databaseTfCounter)
dotProduct = sum(queryTF[i] * databaseTF[i] for i in range(len(queryTF)))
queryVectorMagnitude = math.sqrt(sum(tf ** 2 for tf in queryTF))
databaseVectorMagnitude = math.sqrt(sum(tf ** 2 for tf in databaseTF))
matchPercentage = (dotProduct / (queryVectorMagnitude * databaseVectorMagnitude)) * 100
# Identify plagiarized texts
plagiarizedTexts = list(set(queryWordList) & set(databaseWordList))
output = ""
plagiarism_status = ""
if matchPercentage >= plagiarism_threshold:
plagiarism_status = "Plagiarism Detected"
elif matchPercentage > 0:
plagiarism_status = "Limited Plagiarism"
ai_percentage = calculate_ai_percentage(inputQuery)
is_ai_text = ai_percentage > ai_generated_threshold
ai_text_detected = ai_percentage > ai_detection_threshold
return render_template(
'index.html',
query=inputQuery,
percentage=matchPercentage,
output=output,
plagiarized_texts=plagiarizedTexts,
ai_text=inputQuery,
ai_percentage=ai_percentage,
is_ai_text=is_ai_text,
ai_text_detected=ai_text_detected,
plagiarism_status=plagiarism_status
)
except Exception as e:
return "Error occurred: " + str(e)
if __name__ == "__main__":
app.run()