Skip to content

Commit

Permalink
add segmentation_v2 module
Browse files Browse the repository at this point in the history
  • Loading branch information
yxiaoli committed Oct 2, 2018
1 parent a973bee commit d4f67e1
Show file tree
Hide file tree
Showing 13 changed files with 353 additions and 93 deletions.
101 changes: 101 additions & 0 deletions example/generate_Training_Dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from nptdms import TdmsFile
import npfeintool as npF
import matplotlib.pyplot as plt
import ruptures as rpt
import pandas as pd
import seaborn as sns
from scipy import stats
import scipy.cluster.hierarchy as hac
import math
import numpy as np
from random import sample
from scipy.stats import mode
from sklearn.neighbors import KNeighborsClassifier as KNN
from scipy.cluster.hierarchy import fcluster

def save_to_file():
"""
This example is designed for testing
"""
# ============Step 1: Read in data============
# load in data, take the TDMS data type as example
tdms_file = TdmsFile("/media/sherry/新加卷/ubuntu/WZL-2018/Feintool Daten/FW-1-1/new material/AKF_SS-FW2-H04521-H05000.tdms")
# tdms_file.groups()
df_all = tdms_file.object('Untitled').as_dataframe()
df_force = df_all['Stempel_1 (Formula Result)']
df_stroke = df_all['Position_Ma']

# ============Step 2: Extract each punch shape============
SEH = npF.SegHub()
# Init lists and dfs
segmentation_1 = []
segmentation_2 = []
segmentation_3 = []
segmentation_4 = []
segmentation_5 = []
sub_punch_1 =[]
sub_punch_2 =[]
sub_punch_3 =[]
sub_punch_4 =[]
# Extract all punches of the dataset
df_punches = SEH.extract_hub(df_force, df_stroke, end = 100000)
#print(df_punches.describe())
df_punches = df_punches.reset_index(drop=True)

# ============Step 3: separate into subsegmentation============
x=0
for i in df_punches:
# first changepoint detection on whole punch
punch_seg = SEH.segment_and_plot(df_punches[i].dropna(), 'l2')
# second go further to get divide the fifth sequence
segmentation_1.append(np.asarray(punch_seg[0]))
segmentation_2.append(np.asarray(punch_seg[1]))
segmentation_3.append(np.asarray(punch_seg[2]))
segmentation_4.append(np.asarray(punch_seg[3]))
segmentation_5.append(np.asarray(punch_seg[4]))
sub_punch_seg = SEH.segment_and_plot(punch_seg[4].dropna(), 'rbf', 7 + i, 4)
# append to corresponding list
sub_punch_1.append(np.asarray(sub_punch_seg[0]))
sub_punch_2.append(np.asarray(sub_punch_seg[1]))
sub_punch_3.append(np.asarray(sub_punch_seg[2]))
sub_punch_4.append(np.asarray(sub_punch_seg[3]))
#sub_segmentation.append(sub_punch_seg)
x = 1+x

# Save into files in case
pd.DataFrame(segmentation_1).to_csv("segmentation_0.csv",index=False)
pd.DataFrame(segmentation_2).to_csv("segmentation_1.csv",index=False)
pd.DataFrame(segmentation_3).to_csv("segmentation_2.csv",index=False)
pd.DataFrame(segmentation_4).to_csv("segmentation_3.csv",index=False)
pd.DataFrame(segmentation_5).to_csv("segmentation_4(1).csv",index=False)
pd.DataFrame(sub_punch_1).to_csv("segmentation_4.csv",index=False)
pd.DataFrame(sub_punch_2).to_csv("segmentation_5.csv",index=False)
pd.DataFrame(sub_punch_2).to_csv("segmentation_6.csv",index=False)
pd.DataFrame(sub_punch_2).to_csv("segmentation_7.csv",index=False)


def read_from_file():
# sss.tocsv("xxx.csv")
segmentations=[[],[],[],[],[],[],[],[],[]]
for i in range(0,8):
segmentations[i] = pd.read_csv("segmentation_"+str(i)+".csv")

return segmentations

def save_models(segmentations):
SEH = npF.SegHub()
data_seg = [[],[],[],[],[],[],[],[],[],[]]
for i in range (0,8):
data_seg[i]= SEH.Uniformation(segmentations[i])
z = hac.linkage(data_seg[i], 'ward')
result = SEH.print_clusters(data_seg[i],z,3,False)
pd.DataFrame(result).to_csv("cluster_"+str(i)+".csv",index=False)

def main():
# save the processed sequences into file in case for some failure
save_to_file()
segmentations = read_from_file()
# Uniformation + cluster + save in the file
save_models(segmentations)

main()
79 changes: 0 additions & 79 deletions example/test_for_paper.py

This file was deleted.

87 changes: 87 additions & 0 deletions example/testing_newcoming_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from nptdms import TdmsFile
import npfeintool as npF
import matplotlib.pyplot as plt
import ruptures as rpt
import pandas as pd
import seaborn as sns
from scipy import stats
import scipy.cluster.hierarchy as hac
import math
import numpy as np
from random import sample
from scipy.stats import mode
from sklearn.neighbors import KNeighborsClassifier as KNN
from scipy.cluster.hierarchy import fcluster

def main():
# this script is used for testing and give feedback to training set
# =========Step 1 read in data============
# load in data, take the TDMS data type as example
tdms_file = TdmsFile("/media/sherry/新加卷/ubuntu/WZL-2018/Feintool Daten/FW-1-1/new material/AKF_SS-FW2-H04521-H05000.tdms")
# tdms_file.groups()
df_all = tdms_file.object('Untitled').as_dataframe()
df_force = df_all['Stempel_1 (Formula Result)']
df_stroke = df_all['Position_Ma']
# sample time series data
df_f = df_force[80800:99000].reset_index(drop=True)
df_s = df_stroke[80800:99000].reset_index(drop=True)

# the training data read in
segmentations = read_from_file()

# =========step 2: extract the hub ===========
# Extract all punches of the dataset
SEH = npF.SegHub()
df_punches_t = SEH.extract_hub(df_f, df_s)
df_punches_t = df_punches_t.reset_index(drop=True)

# =========Step 3: segmentation into trends=========
punch_seg = SEH.segment_and_plot(df_punches_t[0].dropna(), 'l2')
sub_punch_seg = SEH.segment_and_plot(punch_seg[4].dropna(), 'rbf', 0, 4)
punch_seg[4] = sub_punch_seg[0]
punch_seg[5] = sub_punch_seg[1]
punch_seg[6] = sub_punch_seg[2]
punch_seg[7] = sub_punch_seg[3]


# =========Step 4: classification=========
for i in range(0,8):
print("Trend:"+str(i+1))
s = SEH.Uniformation(punch_seg[i])
clusters = pd.read_csv("cluster_"+str(i)+".csv")
data_train= SEH.Uniformation(segmentations[i])
row,col=data_train.shape
col= min(len(s),col)
print("Result:.........")
s = s[0:col]
test = pd.DataFrame([s,s])
data_train = data_train.iloc[:,0:col]
# generate new clusters and save into the file
# you cannot direct use xxx = yyyy for tables
new_dataset = data_train.copy()
new_dataset.loc[row] = s.values
z = hac.linkage(new_dataset, 'ward')
result = SEH.print_clusters(data_train,z,3, plot = False)
pd.DataFrame(result).to_csv("cluster_"+str(i)+".csv",index=False)

SEH.classifier(data_train,clusters,test ,3)

#==========Step 5: save the newly added file==========
save_newdata(punch_seg)

def save_newdata(punch_seg):
i = 0
for row in punch_seg:
path = "segmentation_"+str(i)+".csv"
i = i+1
with open(path,'a') as fd:
fd.write(row)
def read_from_file():
# sss.tocsv("xxx.csv")
segmentations=[[],[],[],[],[],[],[],[],[]]
for i in range(0,8):
segmentations[i] = pd.read_csv("segmentation_"+str(i)+".csv")

return segmentations

main()
4 changes: 2 additions & 2 deletions npfeintool/PFA.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
PFA method is used to find the most magnificent features
"""
from nptdms import TdmsFile
from sklearn import preprocessing
from sklearn import preprocessing
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
Expand All @@ -17,7 +17,7 @@ class PFA(object):
def __init__(self, n_features, q=None):
self.q = q
self.n_features = n_features

def fit(self, X):
if not self.q:
self.q = X.shape[1]
Expand Down
2 changes: 1 addition & 1 deletion npfeintool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .filterVar import *
from .segmentation import *

from .segmentation_v2 import SegHub
"""
The __init__.py file is usually empty, but if you remove the __init__.py file, Python will no longer look for submodules inside that directory.
"""
Expand Down
Binary file added npfeintool/__init__.pyc
Binary file not shown.
Binary file modified npfeintool/__pycache__/__init__.cpython-35.pyc
Binary file not shown.
Binary file not shown.
6 changes: 3 additions & 3 deletions npfeintool/filterVar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ def kill_all_x(x, df=None):
return "the data frame is empty."
num_attr = df.columns.size
dropall = []
for i in range(0,num_attr):
for i in range(0, num_attr):
index = df.columns[i]
if(all(df[index]==x)):
if all(df[index] == x):
dropall.append(df.columns[i])
df.drop(dropall, axis = 1, inplace= True)
df.drop(dropall, axis=1, inplace=True)
return df

def keep_x_var(x, df=None):
Expand Down
Binary file added npfeintool/filterVar.pyc
Binary file not shown.
8 changes: 4 additions & 4 deletions npfeintool/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def SW_seg(df_power, width):
df_seg = []
size_df = df_power.size
anchor = 1
while((anchor+width*3) < size_df):
while ((anchor+width*3) < size_df):
mid = df_power[anchor+width: anchor+width*3].idxmax()
seg = df_power[mid-width: mid+width]
df_seg.append(seg.values)
Expand All @@ -37,9 +37,9 @@ def Trend_Detection(s_fr, delta, win_size):
i = 0
j = i;

while (i<len_s):
k = Trend_Change(s_fr[j:i+win_size],delta)
if (k != -1):
while (i < len_s):
k = Trend_Change(s_fr [j : i+win_size], delta)
if(k != -1):
df[j:j+k].plot()
df[j+k+1:i+win_size].plot()
j = i+win_size
Expand Down
Loading

0 comments on commit d4f67e1

Please sign in to comment.