-
Notifications
You must be signed in to change notification settings - Fork 0
/
use model.py
75 lines (59 loc) · 3.2 KB
/
use model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from datetime import datetime
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
# Load the trained model
model = joblib.load('churn_prediction_model.pkl') # Ensure correct model file path
# Load new customer data
new_data = pd.read_csv('new_customer_data.csv') # Replace with the actual file path
# Preprocess the new data
def preprocess_new_data(new_df):
new_df['Date Commande'] = pd.to_datetime(new_df['Date Commande'], format="%Y-%m-%d") # Make sure the date format matches
client_features_new = new_df.groupby('ID Client').agg({
'ID Commande': 'nunique',
'Total': ['mean', 'max', 'min', 'std'],
'ID Article': 'nunique',
'Date Commande': ['min', 'max']
}).reset_index()
# Flatten and rename columns
client_features_new.columns = [' '.join(col).strip() for col in client_features_new.columns.values]
client_features_new.rename(columns={
'ID Client ': 'ID Client',
'ID Commande nunique': 'Total Orders',
'Total mean': 'Average Order Value',
'Total max': 'Max Purchase Value',
'Total min': 'Min Purchase Value',
'Total std': 'Std Dev Order Value',
'ID Article nunique': 'Unique Products Purchased',
'Date Commande min': 'First Purchase Date',
'Date Commande max': 'Last Purchase Date'
}, inplace=True)
# Calculate additional features (same calculations as in training)
current_date = pd.to_datetime('today')
client_features_new['Time Since First Purchase'] = (current_date - client_features_new['First Purchase Date']).dt.days
client_features_new['Time Since Last Purchase'] = (current_date - client_features_new['Last Purchase Date']).dt.days
client_features_new['Purchase Frequency'] = client_features_new['Total Orders'] / client_features_new['Time Since First Purchase']
client_features_new.replace([np.inf, -np.inf], np.nan, inplace=True)
client_features_new.fillna(0, inplace=True) # Replace inf and NaN with 0
client_features_new['Change in Purchase Frequency'] = client_features_new.apply(calculate_frequency_change, axis=1)
# Drop columns (same as in training)
new_data = client_features_new.drop(['ID Client', 'First Purchase Date', 'Last Purchase Date'], axis=1)
# Ensure columns are in the same order as the training data
new_data = new_data[X_train.columns]
return new_data
# Apply the same transformations to new_data
prepared_new_data = preprocess_new_data(new_df)
# Standardize the new data using the saved scaler from the training (make sure to load the scaler object)
prepared_new_data_scaled = scaler.transform(prepared_new_data)
# Make predictions on the new data
predictions = loaded_model.predict(prepared_new_data_scaled)
probabilities = loaded_model.predict_proba(prepared_new_data_scaled)[:, 1]
# Create DataFrame with predictions and probabilities
results = pd.DataFrame({
'ID Client': client_features_new['ID Client'], # Assuming you have 'ID Client' in the new data
'Predicted Churn': predictions,
'Churn Probability': probabilities
})
print(results)
Total of Quantities = SUM(data_cleaned[Quantité])