-
Notifications
You must be signed in to change notification settings - Fork 0
/
modelGenerator.py
88 lines (71 loc) · 3.41 KB
/
modelGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import sqlite3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# Retrieve data from SQLite database
connection = sqlite3.connect("./patients.db")
cursor = connection.cursor()
cursor.execute("SELECT age, cholesterol, blood_pressure FROM datas")
data = cursor.fetchall()
for row in data:
print(row)
connection.close()
# Convert data to a pandas DataFrame for easier manipulation
columns = ['age', 'cholesterol', 'blood_pressure']
df = pd.DataFrame(data, columns=columns)
# Extract systolic and diastolic pressure from the 'blood_pressure' column
df['systolic'], df['diastolic'] = df['blood_pressure'].str.split("/",expand=True)
df['systolic'] = df['systolic'].astype(float)
df['diastolic'] = df['diastolic'].astype(float)
# Organize data into features and labels
# features = [(age, cholesterol) for age, cholesterol, _ in data]
# labels = [blood_pressure for _, _, blood_pressure in data]
# Organize data into features and labels
features_cols = ['age', 'cholesterol', 'systolic', 'diastolic']
label_col = 'systolic' # You can choose 'systolic' or 'diastolic' as the target variable
X = df[features_cols]
y = df[label_col]
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Model Selection and Training (Linear Regression example)
model = LinearRegression()
model.fit(X_train_scaled, y_train)
# Model Evaluation
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
# Prediction for new data
# Assuming you have new data
new_age = 40
new_cholesterol = 200
# Creating a new data point with the provided values
new_data_point = [(new_age, new_cholesterol, '183/62', 0)]
# Creating a DataFrame with column names
new_data_point_df = pd.DataFrame(new_data_point, columns=['age', 'cholesterol', 'systolic_diastolic', 'diastolic'])
print(new_data_point_df.dtypes)
print(new_data_point_df.head()) # This will print the first few rows of the DataFrame
# Convert 'systolic_diastolic' to string
new_data_point_df['systolic_diastolic'] = new_data_point_df['systolic_diastolic'].astype(str)
# Extracting 'systolic' and 'diastolic' values
new_data_point_df[['systolic', 'diastolic']] = new_data_point_df['systolic_diastolic'].str.split('/', expand=True)
new_data_point_df['systolic'] = new_data_point_df['systolic'].astype(float)
new_data_point_df['diastolic'] = new_data_point_df['diastolic'].astype(float)
# Dropping the temporary 'systolic_diastolic' column
new_data_point_df = new_data_point_df.drop(columns=['systolic_diastolic'])
print("After splitting:")
print(new_data_point_df.dtypes) # This will print the data types of each column
print(new_data_point_df.head())
# Scaling the new data point using the previously fitted scaler
new_data_point_scaled = scaler.transform(new_data_point_df[['age','cholesterol','systolic','diastolic']])
print("Scaled new data point:")
print(new_data_point_scaled)
# Making predictions using the trained model
predicted_blood_pressure = model.predict(new_data_point_scaled)
# Printing the predicted systolic blood pressure
print("Predicted Systolic Blood Pressure:", predicted_blood_pressure)