update for new module

Senseering · Oct 1, 2018 · a973bee · a973bee
1 parent 9ed8625
commit a973bee
Show file tree

Hide file tree

Showing 3 changed files with 198 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,5 @@ npFeintool.egg-info
 .png
 test_phil.py
 test.csv
+pipeline.ipynb
+test_for_ruptures.ipynb
diff --git a/example/test_for_paper.py b/example/test_for_paper.py
@@ -0,0 +1,79 @@
+from nptdms import TdmsFile
+import numpy as np
+import npfeintool as npF
+from npfeintool import filterVar
+import matplotlib.pyplot as plt
+
+def main():
+    """
+    This example is designed for testing 
+    """
+    # ============Step 1: Read in data============
+    # load in data, take the TDMS data type as example
+    tdms_file = TdmsFile(".\\AKF_SS-FW2-H04521-H05000.tdms")
+    # tdms_file.groups()
+    df_all = tdms_file.object('Untitled').as_dataframe()
+    df_force = df_all['Stempel_1 (Formula Result)']
+    df_stroke = df_all['Position_Ma']
+
+    # ============Step 2: Extract each punch shape============
+    SEH = npF.SegHub()
+    # Init lists and dfs
+    segmentation_1 = []
+    segmentation_2 = []
+    segmentation_3 = []
+    segmentation_4 = []
+    segmentation_5 = []
+    # Extract all punches of the dataset
+    df_punches = SEH.extract_hub(df_force, df_stroke)
+    #print(df_punches.describe())
+    df_punches = df_punches.reset_index(drop=True)
+    # Step 1: kick out the "all zero", "all one" value
+    # this Step is essential for selection meaningful selection
+
+    df_temp = npF.kill_all_x(0, df_force)
+    df_temp = npF.keep_x_var(0.8, df_temp)
+    # findout the selected columns
+    ix = np.isin(df_force.values[0,:], df_temp[0])
+    column_indices = np.where(ix==True)
+    df_force = df_force.iloc[:, column_indices[0]]
+    '''
+    Step 2: divide the periodical time series data into unit
+    e.g. we select only one channel data to segementation
+    Params are: dataset, windsize(the approx unit size)
+    '''
+    df_punch = df_force['Stempel_1 (Formula Result)']
+    # df_seg = npF.SW_seg(df_punch, 500)
+
+
+    len_s = 45000# len(s_fr)
+    delta = 400#276
+    win_size = 1000
+    i = 38000
+    j = i
+    # s_fr.drop([len_s, len(s_fr) - 1]).to_csv('test.csv')
+    df_punch.truncate(before=i, after=len_s - 1).to_csv('test.csv')
+    # df['A'].truncate(before=2, after=4)
+
+    while i < len_s:
+        k = npF.Trend_Change(df_punch[j:i+win_size], delta)
+        if k != -1:
+            df_punch[j:j+k].plot()
+            plt.savefig('plot' + str(i) + '.png')
+            plt.clf()
+            df_punch[j+k+1:i+win_size].plot()
+            plt.savefig('plot' + str(i) + 'x.png')
+            plt.clf()
+            print(str(j) + " + " + str(j + k) + "; " + str(j+k+1) + " to " + str(i + win_size))
+            j = i + win_size
+        i += win_size
+
+    plt.savefig('plot.png')
+
+    # npF.Trend_Detection(df_punch[38000:45000], 400, 1000)
+    # choose one unit to dectec the trends
+    # npF.Trend_Detection(df_seg[2], 279, 1000)
+    # choose the whole sequences
+    # npF.Trend_Detection(df_power, 279, 1000)
+
+main()
diff --git a/npfeintool/segmentation_v2.py b/npfeintool/segmentation_v2.py
@@ -0,0 +1,117 @@
+from nptdms import TdmsFile
+import matplotlib.pyplot as plt
+import ruptures as rpt
+import pandas as pd
+import seaborn as sns
+from scipy import stats
+import scipy.cluster.hierarchy as hac
+import math
+import numpy as np
+from random import sample
+from scipy.stats import mode
+from sklearn.neighbors import KNeighborsClassifier as KNN
+from scipy.cluster.hierarchy import fcluster
+
+class SegHub(object):
+
+    def segment_and_plot(self, df, model='l1', count=0, n_bkps=5):
+        '''
+        recieves a segment as input and runs ruptures changeppoint detection algorithm to segment a
+        punch into subsequences. Eventually, it plots the segments,
+        saves the plot and returns a dataframe containing all segments numbered begining with 1
+        '''
+        dfs = pd.DataFrame()
+        lower_bound = 0
+        upper_bound = len(df)
+
+        # change point detection
+        #model = "l1"  # "l2", "rbf"
+
+        data = df[lower_bound:upper_bound].values
+        algo = rpt.Dynp(model=model, min_size=15, jump=15).fit(data)
+        my_bkps_t = algo.predict(n_bkps)
+
+        j = 0
+        lower_bound_temp = lower_bound
+
+        while j < n_bkps :
+            df_temp = pd.DataFrame({j: df[my_bkps_t[j]:my_bkps_t[j+1]]})
+            df_temp = df_temp.reset_index(drop=True)
+            dfs = pd.concat([dfs, df_temp], axis=1)
+            #df[lower_bound_temp:my_bkps_t[j]+lower_bound].plot()
+            #lower_bound_temp = my_bkps_t[j]+lower_bound
+            j = j + 1
+        #plt.savefig('plot_' + model + '_' + str(count) + '.png')
+        #plt.clf()
+        #print(dfs)
+        return dfs
+
+    def extract_hub(self, df_force, df_stroke, start=3000, threshold=0.51):
+        '''
+        returns data frame containing each punch segment derived by the df_stroke timeseries data
+        '''
+        dfs = pd.DataFrame()
+        flag = False
+        begin_temp = 0
+        count = 0
+        end = len(df_stroke)
+        for i in range(start, end):
+            if not(flag) and df_stroke[i] > threshold:
+                flag = True
+                begin_temp = i
+            elif flag and df_stroke[i] > threshold:
+                continue
+            elif flag and df_stroke[i] < threshold:
+                #print(begin_temp,i)
+                seg_temp = df_force[begin_temp:i].reset_index(drop=True)
+                #seg_temp = (begin_temp, i)
+                #segments.append(seg_temp)
+                df_temp= pd.DataFrame({count: seg_temp})
+                #df_temp = pd.DataFrame(seg_temp)
+                #df_temp = df_temp.reset_index(drop=True)
+                dfs = pd.concat([dfs,df_temp], axis =1)
+                flag = False
+                count = count + 1
+            else:
+                continue
+        return dfs
+
+
+    def plot_all(df_to_plot):
+        '''
+        plots all columns in the dataframe
+        '''
+        ax = None
+        for index in df_to_plot:
+            ax = sns.tsplot(ax=ax, data=df_to_plot[index].values, err_style="unit_traces")
+        plt.savefig('test.png')
+
+    def separate_subsequence(df_segments, target_segment):
+        dfs = pd.DataFrame()
+
+        for j in range(0, len(df_segments)):
+            df_temp = pd.DataFrame({j: df_segments[j]})
+            df_temp = df_temp.reset_index(drop=True)   
+            dfs = pd.concat([dfs, df_temp], axis=1)
+
+    def DTWDistance(self, s1, s2):
+        '''
+        plots all columns in the dataframe
+        '''
+        DTW = {}
+
+        for i in range(len(s1)):
+            DTW[(i, -1)] = float('inf')
+        for i in range(len(s2)):
+            DTW[(-1, i)] = float('inf')
+        DTW[(-1, -1)] = 0
+
+        for i in range(len(s1)):
+            for j in range(len(s2)):
+                dist = (s1[i]-s2[j])**2
+                DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])
+
+        return math.sqrt(DTW[len(s1)-1, len(s2)-1])
+
+
+