-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
merge pull request #10 from mukeshmk/autosk-meta-feat
added data model for meta-features and kNN search algorithm
- Loading branch information
Showing
8 changed files
with
169 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from app import db, ma | ||
from constants import TABLE_META_FEATURES | ||
|
||
|
||
# Meta Feature Class/Model | ||
class MetaFeature(db.Model): | ||
__tablename__ = TABLE_META_FEATURES | ||
id = db.Column(db.Integer, primary_key=True) | ||
metric_id = db.Column(db.Integer, db.ForeignKey('metric.id'), nullable=False) | ||
feat_name = db.Column(db.String(200)) | ||
feat_value = db.Column(db.String(200)) | ||
|
||
def __init__(self, metric_id, feat_name, feat_value): | ||
self.metric_id = metric_id | ||
self.feat_name = feat_name | ||
self.feat_value = feat_value | ||
|
||
|
||
# Meta Feature Schema | ||
class MetaFeatureSchema(ma.Schema): | ||
class Meta: | ||
fields = ('id', 'metric_id', 'feat_name', 'feat_value') | ||
|
||
|
||
# Init schema | ||
meta_feature_schema = MetaFeatureSchema() | ||
meta_features_schema = MetaFeatureSchema(many=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import pandas as pd | ||
import utils | ||
|
||
from sklearn.model_selection import train_test_split | ||
from sklearn.neighbors import KNeighborsRegressor | ||
|
||
def kmc(): | ||
df = utils.get_df_from_db() | ||
df.fillna(0, inplace=True) | ||
|
||
X, y = utils.get_Xy(df) | ||
|
||
# pre processing of data | ||
X, _ = utils.ohe_feature(X, utils.TARGET_TYPE) | ||
|
||
y, _ = utils.label_encode_feature(y, utils.ALGORITHM_NAME) | ||
y, _ = utils.label_encode_feature(y, utils.METRIC_NAME) | ||
|
||
# train test split | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123) | ||
|
||
model = KNeighborsRegressor(n_neighbors=2) | ||
|
||
model.fit(X_train, y_train) | ||
|
||
y_pred = model.predict(X_test) | ||
|
||
print(y_test.to_string(header=False)) | ||
y_pred = pd.DataFrame(y_pred) | ||
print(y_pred.to_string(header=False)) | ||
|
||
if __name__ == "__main__": | ||
kmc() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import pandas as pd | ||
|
||
from data_models.Metrics import Metric | ||
|
||
from sklearn import preprocessing as pp | ||
|
||
TARGET_TYPE = 'Target Type' | ||
ALGORITHM_NAME = 'Algorithm Name' | ||
METRIC_NAME = 'Metric Name' | ||
METRIC_VALUE = 'Metric Value' | ||
|
||
def get_df_from_db(): | ||
all_metrics = Metric.query.all() | ||
df = pd.DataFrame() | ||
for metric in all_metrics: | ||
data = {} | ||
if TARGET_TYPE not in data: | ||
data[TARGET_TYPE] = [] | ||
data[TARGET_TYPE].append(str(metric.target_type)) | ||
|
||
for mf in metric.meta_features: | ||
if mf.feat_name not in data: | ||
data[mf.feat_name] = [] | ||
data[mf.feat_name].append(float(mf.feat_value)) | ||
|
||
if ALGORITHM_NAME not in data: | ||
data[ALGORITHM_NAME] = [] | ||
data[ALGORITHM_NAME].append(str(metric.algorithm_name)) | ||
|
||
if METRIC_NAME not in data: | ||
data[METRIC_NAME] = [] | ||
data[METRIC_NAME].append(str(metric.metric_name)) | ||
|
||
if METRIC_VALUE not in data: | ||
data[METRIC_VALUE] = [] | ||
data[METRIC_VALUE].append(float(metric.metric_value)) | ||
|
||
df = df.append(pd.DataFrame.from_dict(data)) | ||
|
||
return df.reset_index() | ||
|
||
|
||
def get_Xy(df): | ||
X = df[df.columns.difference([ALGORITHM_NAME, METRIC_NAME, METRIC_VALUE])] | ||
y = df[[ALGORITHM_NAME, METRIC_NAME, METRIC_VALUE]] | ||
return X, y | ||
|
||
# One Hot Encoding | ||
def ohe_feature(df, feature, drop_additional_feature=True): | ||
encoder = pp.OneHotEncoder(categories='auto', sparse=False) | ||
data = encoder.fit_transform(df[feature].values.reshape(len(df[feature]), 1)) | ||
# creating the encoded df | ||
ohedf = pd.DataFrame(data, columns=[feature + ': ' + str(i.strip('x0123_')) for i in encoder.get_feature_names()]) | ||
# to drop the extra column of redundant data | ||
if drop_additional_feature: | ||
ohedf.drop(ohedf.columns[len(ohedf.columns) - 1], axis=1, inplace=True) | ||
# concat the ohe df with the original df | ||
df = pd.concat([df, ohedf], axis=1) | ||
# to drop the original column in the df | ||
del df[feature] | ||
|
||
return df, encoder | ||
|
||
# Label Encoding | ||
def label_encode_feature(df, feature): | ||
encoder = pp.LabelEncoder() | ||
data = encoder.fit_transform(df[feature].values.reshape(len(df[feature]), 1)) | ||
# to drop the original column in the df | ||
del df[feature] | ||
# creating the encoded df | ||
ledf = pd.DataFrame(data, columns=[feature]) | ||
# concat the ohe df with the original df | ||
df = pd.concat([df, ledf], axis=1) | ||
|
||
return df, encoder |