Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tatiana belyukina #11

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
20c532b
Add files via upload
TatianaBelyukina Jan 29, 2021
29c1c1c
Add files via upload
TatianaBelyukina Jan 29, 2021
0341eec
Create task_2_HousePrices
TatianaBelyukina Feb 1, 2021
3b8343d
Delete task_2_HousePrices
TatianaBelyukina Feb 1, 2021
ab38cd5
Create empty
TatianaBelyukina Feb 1, 2021
bb6070e
Rename main.ipynb to TatianaBelyukina_code/task1/main.ipynb
TatianaBelyukina Feb 1, 2021
d65dd00
Delete empty
TatianaBelyukina Feb 1, 2021
ffe1d62
Rename requirements.txt to TatianaBelyukina_code/task1/requirements.txt
TatianaBelyukina Feb 1, 2021
e8c4b50
Rename titanic.csv to TatianaBelyukina_code/task1/titanic.csv
TatianaBelyukina Feb 1, 2021
cd8cf64
Create empty
TatianaBelyukina Feb 1, 2021
2926024
Add files via upload
TatianaBelyukina Feb 1, 2021
c9db45f
Delete empty
TatianaBelyukina Feb 1, 2021
c442562
Delete TatianaBelyukia_code/task2 directory
TatianaBelyukina Feb 1, 2021
4bcc963
Create empty
TatianaBelyukina Feb 1, 2021
8b0afcf
Add files via upload
TatianaBelyukina Feb 1, 2021
fa33cc0
Delete empty
TatianaBelyukina Feb 1, 2021
7097002
Add files via upload
TatianaBelyukina Feb 1, 2021
721df64
Add files via upload
TatianaBelyukina Feb 1, 2021
0df6627
Add files via upload
TatianaBelyukina Feb 1, 2021
57d7852
Add files via upload
TatianaBelyukina Feb 1, 2021
ed01ce0
Add files via upload
TatianaBelyukina Feb 1, 2021
35bb4fe
Add files via upload
TatianaBelyukina Feb 1, 2021
46a1859
Create empty
TatianaBelyukina Feb 3, 2021
95fbda4
Add files via upload
TatianaBelyukina Feb 3, 2021
eed2580
Add files via upload
TatianaBelyukina Feb 3, 2021
ac91304
Delete empty
TatianaBelyukina Feb 3, 2021
befcf7c
Add files via upload
TatianaBelyukina Feb 3, 2021
beb4d08
Rename TatianaBelyukina_code/task1/main.py to TatianaBelyukina_code/t…
TatianaBelyukina Feb 3, 2021
aca05a2
Rename TatianaBelyukina_code/task1/requirements.txt to TatianaBelyuki…
TatianaBelyukina Feb 3, 2021
dd90d68
Rename TatianaBelyukina_code/task1/main.ipynb to TatianaBelyukina_cod…
TatianaBelyukina Feb 3, 2021
59b48f6
Rename TatianaBelyukina_code/task1/titanic.csv to TatianaBelyukina_co…
TatianaBelyukina Feb 3, 2021
d5b1564
Create main.py
TatianaBelyukina Feb 9, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
951 changes: 951 additions & 0 deletions TatianaBelyukina_code/task1_Titanic/main.ipynb

Large diffs are not rendered by default.

171 changes: 171 additions & 0 deletions TatianaBelyukina_code/task1_Titanic/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

titanic = pd.read_csv('./train.csv')
titanic.head(10)

print(titanic.shape)
print(titanic.describe())
print(titanic['Survived'].value_counts())
print(titanic.groupby('Sex')[['Survived']].mean())
print(titanic.pivot_table('Survived', index='Sex', columns='Pclass'))
titanic.pivot_table('Survived', index='Sex', columns='Pclass').plot()
plt.show()

age = pd.cut(titanic['Age'], [0, 18, 80])
print(titanic.pivot_table('Survived', ['Sex', age], 'Pclass'))

print(titanic.isna().sum())


def splitColumn(data, feature):
vals = set(data[feature])
for val in vals:
if str(val) == "nan":
continue
a = []
for i in range(data.shape[0]):
if data[feature][i] == val:
a.append(1)
else:
a.append(0)
data[feature + "_" + str(val)] = a
return data.drop([feature], axis=1)


def splitFloatColumn(data, feature, diapasones):
for i in range(len(diapasones) + 1):
a = []
for j in range(data.shape[0]):
t = True
if i > 0:
t &= (data[feature][j] >= diapasones[i - 1])
if i < len(diapasones):
t &= (data[feature][j] < diapasones[i])
if t:
a.append(1)
else:
a.append(0)
data[feature + "_" + str(i)] = a
return data.drop([feature], axis=1)


def preprocess(data):
for i in range(data.shape[0]):
if np.isnan(data["Age"][i]):
if "Miss" in data["Name"][i] or "Ms" in data["Name"][i]:
data["Age"][i] = 15
elif "Mrs" in data["Name"][i] or "Mr" in data["Name"][i]:
data["Age"][i] = 30
elif "Don." in data["Name"][i]:
data["Age"][i] = 40
else:
data["Age"][i] = 10

data = data.drop(['PassengerId', 'Ticket', 'Name', 'Cabin'], axis=1)
print(data)
data = splitColumn(data, "Embarked")
data = splitColumn(data, "Sex")
data = splitColumn(data, "Pclass")
data = splitFloatColumn(data, "Parch", [1, 2, 4])
data = splitFloatColumn(data, "SibSp", [1, 2, 4])
data = splitFloatColumn(data, "Age", [2, 5, 10, 16, 20, 28, 35, 45])
data = splitFloatColumn(data, "Fare", [7, 9, 10, 15, 30, 60, 80])

for feature in data:
print(feature)
for i in range(data.shape[0]):
if np.isnan(data[feature][i]):
data[feature][i] = 0
print(data)
return data


def split_data(data):
return (data.iloc[:, 1:].values, data.iloc[:, 0].values)


titanic = preprocess(titanic)
print(titanic.dtypes)

X, Y = split_data(titanic)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)


def check_model(x_train, y_train, x_test, y_test, model):
print("traindata:", model.score(x_train, y_train))
print("testdata:", model.score(x_test, y_test))
print()


def models(x_train, y_train, x_test, y_test):
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion="entropy")
tree.fit(x_train, y_train)

from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression(max_iter=1000000)
logReg.fit(x_train, y_train)

from sklearn.neighbors import KNeighborsClassifier
neighbors = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=2)
neighbors.fit(x_train, y_train)

from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(n_estimators=10, criterion="entropy")
forest.fit(x_train, y_train)

from sklearn.naive_bayes import GaussianNB
gauss = GaussianNB()
gauss.fit(x_train, y_train)

from sklearn.svm import SVC
svc = SVC(kernel="linear")
svc.fit(x_train, y_train)

svcRBF = SVC(kernel="rbf")
svcRBF.fit(x_train, y_train)

print("tree")
check_model(x_train, y_train, x_test, y_test, tree)
print("logReg")
check_model(x_train, y_train, x_test, y_test, logReg)
print("neighbors")
check_model(x_train, y_train, x_test, y_test, neighbors)
print("forest")
check_model(x_train, y_train, x_test, y_test, forest)
print("gauss")
check_model(x_train, y_train, x_test, y_test, gauss)
print("svc_linear")
check_model(x_train, y_train, x_test, y_test, svc)
print("svc_rbf")
check_model(x_train, y_train, x_test, y_test, svcRBF)
return svcRBF


model = models(X_train, Y_train, X_test, Y_test)


def generateAns(model):
test_ids = pd.read_csv("test.csv")["PassengerId"]
test = pd.read_csv("test.csv")

test = preprocess(test)
x, y = split_data(test)

Y_pred = model.predict(x)
print(Y_pred)
Y_pred = Y_pred.astype(int)
print(Y_pred)

submission = pd.DataFrame({
"PassengerId": test_ids,
"Survived": Y_pred
})
submission.to_csv('./titanic.csv', index=False)
print('Exported!')


generateAns(model)
File renamed without changes.
Loading