add segmentation_v2 module

Senseering · Oct 2, 2018 · d4f67e1 · d4f67e1
1 parent a973bee
commit d4f67e1
Show file tree

Hide file tree

Showing 13 changed files with 353 additions and 93 deletions.
diff --git a/example/generate_Training_Dataset.py b/example/generate_Training_Dataset.py
@@ -0,0 +1,101 @@
+from nptdms import TdmsFile
+import npfeintool as npF
+import matplotlib.pyplot as plt
+import ruptures as rpt
+import pandas as pd
+import seaborn as sns
+from scipy import stats
+import scipy.cluster.hierarchy as hac
+import math
+import numpy as np
+from random import sample
+from scipy.stats import mode
+from sklearn.neighbors import KNeighborsClassifier as KNN
+from scipy.cluster.hierarchy import fcluster
+
+def save_to_file():
+    """
+    This example is designed for testing 
+    """
+    # ============Step 1: Read in data============
+    # load in data, take the TDMS data type as example
+    tdms_file = TdmsFile("/media/sherry/新加卷/ubuntu/WZL-2018/Feintool Daten/FW-1-1/new material/AKF_SS-FW2-H04521-H05000.tdms")
+    # tdms_file.groups()
+    df_all = tdms_file.object('Untitled').as_dataframe()
+    df_force = df_all['Stempel_1 (Formula Result)']
+    df_stroke = df_all['Position_Ma']
+
+    # ============Step 2: Extract each punch shape============
+    SEH = npF.SegHub()
+    # Init lists and dfs
+    segmentation_1 = []
+    segmentation_2 = []
+    segmentation_3 = []
+    segmentation_4 = []
+    segmentation_5 = []
+    sub_punch_1 =[]
+    sub_punch_2 =[]
+    sub_punch_3 =[]
+    sub_punch_4 =[]
+    # Extract all punches of the dataset
+    df_punches = SEH.extract_hub(df_force, df_stroke, end = 100000)
+    #print(df_punches.describe())
+    df_punches = df_punches.reset_index(drop=True)
+
+    # ============Step 3: separate into subsegmentation============
+    x=0
+    for i in df_punches:
+        # first changepoint detection on whole punch
+        punch_seg = SEH.segment_and_plot(df_punches[i].dropna(), 'l2')
+        # second go further to get divide the fifth sequence
+        segmentation_1.append(np.asarray(punch_seg[0]))
+        segmentation_2.append(np.asarray(punch_seg[1]))
+        segmentation_3.append(np.asarray(punch_seg[2]))
+        segmentation_4.append(np.asarray(punch_seg[3]))
+        segmentation_5.append(np.asarray(punch_seg[4]))
+        sub_punch_seg = SEH.segment_and_plot(punch_seg[4].dropna(), 'rbf', 7 + i, 4)
+        # append to corresponding list
+        sub_punch_1.append(np.asarray(sub_punch_seg[0]))
+        sub_punch_2.append(np.asarray(sub_punch_seg[1]))
+        sub_punch_3.append(np.asarray(sub_punch_seg[2]))
+        sub_punch_4.append(np.asarray(sub_punch_seg[3]))
+        #sub_segmentation.append(sub_punch_seg)
+        x = 1+x
+
+    # Save into files in case 
+    pd.DataFrame(segmentation_1).to_csv("segmentation_0.csv",index=False)
+    pd.DataFrame(segmentation_2).to_csv("segmentation_1.csv",index=False)
+    pd.DataFrame(segmentation_3).to_csv("segmentation_2.csv",index=False)
+    pd.DataFrame(segmentation_4).to_csv("segmentation_3.csv",index=False)
+    pd.DataFrame(segmentation_5).to_csv("segmentation_4(1).csv",index=False)
+    pd.DataFrame(sub_punch_1).to_csv("segmentation_4.csv",index=False)
+    pd.DataFrame(sub_punch_2).to_csv("segmentation_5.csv",index=False)
+    pd.DataFrame(sub_punch_2).to_csv("segmentation_6.csv",index=False)
+    pd.DataFrame(sub_punch_2).to_csv("segmentation_7.csv",index=False)
+
+
+def read_from_file():
+    # sss.tocsv("xxx.csv")
+    segmentations=[[],[],[],[],[],[],[],[],[]]
+    for i in range(0,8):
+        segmentations[i] = pd.read_csv("segmentation_"+str(i)+".csv")
+
+    return segmentations
+
+def save_models(segmentations):
+    SEH = npF.SegHub()
+    data_seg = [[],[],[],[],[],[],[],[],[],[]]
+    for i in range (0,8):
+        data_seg[i]= SEH.Uniformation(segmentations[i])
+        z = hac.linkage(data_seg[i], 'ward')
+        result = SEH.print_clusters(data_seg[i],z,3,False)
+        pd.DataFrame(result).to_csv("cluster_"+str(i)+".csv",index=False)
+
+def main():
+    # save the processed sequences into file in case for some failure 
+    save_to_file()
+    segmentations = read_from_file()
+    # Uniformation + cluster + save in the file
+    save_models(segmentations)
+
+main()
diff --git a/example/test_for_paper.py b/example/test_for_paper.py
diff --git a/example/testing_newcoming_data.py b/example/testing_newcoming_data.py
@@ -0,0 +1,87 @@
+from nptdms import TdmsFile
+import npfeintool as npF
+import matplotlib.pyplot as plt
+import ruptures as rpt
+import pandas as pd
+import seaborn as sns
+from scipy import stats
+import scipy.cluster.hierarchy as hac
+import math
+import numpy as np
+from random import sample
+from scipy.stats import mode
+from sklearn.neighbors import KNeighborsClassifier as KNN
+from scipy.cluster.hierarchy import fcluster
+
+def main():
+	# this script is used for testing and give feedback to training set
+	# =========Step 1 read in data============
+	# load in data, take the TDMS data type as example
+	tdms_file = TdmsFile("/media/sherry/新加卷/ubuntu/WZL-2018/Feintool Daten/FW-1-1/new material/AKF_SS-FW2-H04521-H05000.tdms")
+	# tdms_file.groups()
+	df_all = tdms_file.object('Untitled').as_dataframe()
+	df_force = df_all['Stempel_1 (Formula Result)']
+	df_stroke = df_all['Position_Ma']
+	# sample time series data 
+	df_f = df_force[80800:99000].reset_index(drop=True)
+	df_s = df_stroke[80800:99000].reset_index(drop=True)
+
+	# the training data read in
+	segmentations = read_from_file()
+
+    # =========step 2: extract the hub ===========
+	# Extract all punches of the dataset
+	SEH = npF.SegHub()
+	df_punches_t = SEH.extract_hub(df_f, df_s)
+	df_punches_t = df_punches_t.reset_index(drop=True)
+
+	# =========Step 3: segmentation into trends=========
+	punch_seg = SEH.segment_and_plot(df_punches_t[0].dropna(), 'l2')
+	sub_punch_seg = SEH.segment_and_plot(punch_seg[4].dropna(), 'rbf', 0, 4)
+	punch_seg[4] = sub_punch_seg[0]
+	punch_seg[5] = sub_punch_seg[1]
+	punch_seg[6] = sub_punch_seg[2]
+	punch_seg[7] = sub_punch_seg[3]
+
+
+	# =========Step 4: classification=========
+	for i in range(0,8):
+	    print("Trend:"+str(i+1))
+	    s = SEH.Uniformation(punch_seg[i])
+	    clusters = pd.read_csv("cluster_"+str(i)+".csv")
+	    data_train= SEH.Uniformation(segmentations[i])
+	    row,col=data_train.shape
+	    col= min(len(s),col)
+	    print("Result:.........")
+	    s = s[0:col]
+	    test = pd.DataFrame([s,s])
+	    data_train = data_train.iloc[:,0:col]
+	    # generate new clusters and save into the file
+	    # you cannot direct use xxx = yyyy for tables
+	    new_dataset = data_train.copy()
+	    new_dataset.loc[row] = s.values
+	    z = hac.linkage(new_dataset, 'ward')
+	    result = SEH.print_clusters(data_train,z,3, plot = False)
+	    pd.DataFrame(result).to_csv("cluster_"+str(i)+".csv",index=False)
+
+	    SEH.classifier(data_train,clusters,test ,3)
+
+	#==========Step 5: save the newly added file==========
+	save_newdata(punch_seg)
+
+def save_newdata(punch_seg):
+	i = 0
+	for row in punch_seg:
+		path = "segmentation_"+str(i)+".csv"
+		i = i+1
+		with open(path,'a') as fd:
+			fd.write(row)
+def read_from_file():
+    # sss.tocsv("xxx.csv")
+    segmentations=[[],[],[],[],[],[],[],[],[]]
+    for i in range(0,8):
+        segmentations[i] = pd.read_csv("segmentation_"+str(i)+".csv")
+
+    return segmentations		
+
+main()
diff --git a/npfeintool/PFA.py b/npfeintool/PFA.py
@@ -3,7 +3,7 @@
 PFA method is used to find the most magnificent features
 """
 from nptdms import TdmsFile
-from sklearn import preprocessing 
+from sklearn import preprocessing
 import pandas as pd
 import numpy as np
 from sklearn.cluster import KMeans
@@ -17,7 +17,7 @@ class PFA(object):
     def __init__(self, n_features, q=None):
         self.q = q
         self.n_features = n_features
-
+        
     def fit(self, X):
         if not self.q:
             self.q = X.shape[1]

diff --git a/npfeintool/__init__.py b/npfeintool/__init__.py
@@ -1,6 +1,6 @@
 from .filterVar import *
 from .segmentation import *
-
+from .segmentation_v2 import SegHub
 """
 The __init__.py file is usually empty, but if you remove the __init__.py file, Python will no longer look for submodules inside that directory.
 """

diff --git a/npfeintool/__init__.pyc b/npfeintool/__init__.pyc
diff --git a/npfeintool/__pycache__/__init__.cpython-35.pyc b/npfeintool/__pycache__/__init__.cpython-35.pyc
diff --git a/npfeintool/__pycache__/segmentation_v2.cpython-35.pyc b/npfeintool/__pycache__/segmentation_v2.cpython-35.pyc
diff --git a/npfeintool/filterVar.py b/npfeintool/filterVar.py
@@ -17,11 +17,11 @@ def kill_all_x(x, df=None):
 		return "the data frame is empty."
 	num_attr = df.columns.size
 	dropall = []
-	for i in range(0,num_attr):
+	for i in range(0, num_attr):
 		index = df.columns[i]
-		if(all(df[index]==x)):
+		if all(df[index] == x):
 			dropall.append(df.columns[i])
-	df.drop(dropall, axis = 1, inplace= True)
+	df.drop(dropall, axis=1, inplace=True)
 	return df
 
 def keep_x_var(x, df=None):

diff --git a/npfeintool/filterVar.pyc b/npfeintool/filterVar.pyc
diff --git a/npfeintool/segmentation.py b/npfeintool/segmentation.py
@@ -15,7 +15,7 @@ def SW_seg(df_power, width):
 	df_seg = []
 	size_df = df_power.size
 	anchor = 1 
-	while((anchor+width*3) < size_df): 
+	while ((anchor+width*3) < size_df): 
 		mid = df_power[anchor+width: anchor+width*3].idxmax()
 		seg = df_power[mid-width: mid+width]
 		df_seg.append(seg.values)
@@ -37,9 +37,9 @@ def Trend_Detection(s_fr, delta, win_size):
 	i = 0
 	j = i;
 
-	while (i<len_s):
-		k = Trend_Change(s_fr[j:i+win_size],delta)
-		if (k != -1):
+	while (i < len_s):
+		k = Trend_Change(s_fr [j : i+win_size], delta)
+		if(k != -1):
 		   df[j:j+k].plot()
 		   df[j+k+1:i+win_size].plot()
 		   j = i+win_size