-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfake_news_detection.py
50 lines (37 loc) · 1.51 KB
/
fake_news_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#Importing the libraries
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
#Importing the cleaned file containing the text and label
news = pd.read_csv('news.csv')
X = news['text']
y = news['label']
#Splitting the data into train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#Creating a pipeline that first creates bag of words(after applying stopwords) & then applies Multinomial Naive Bayes model
pipeline = Pipeline([('tfidf', TfidfVectorizer(stop_words='english')),('nbmodel', MultinomialNB())])
#Training our data
pipeline.fit(X_train, y_train)
#Predicting the label for the test data
pred = pipeline.predict(X_test)
#Checking the performance of our model
print(classification_report(y_test, pred))
print(confusion_matrix(y_test, pred))
#Serialising the file
with open('model.pickle', 'wb') as handle:
pickle.dump(pipeline, handle, protocol=pickle.HIGHEST_PROTOCOL)
'''
precision recall f1-score support
FAKE 0.98 0.63 0.77 678
REAL 0.70 0.99 0.82 589
accuracy 0.80 1267
macro avg 0.84 0.81 0.79 1267
weighted avg 0.85 0.80 0.79 1267
[[429 249]
[ 8 581]]
'''