From a973bee06bac0c232d01fad7c6e820e5cae4655c Mon Sep 17 00:00:00 2001 From: tintinsnowy Date: Mon, 1 Oct 2018 10:02:47 +0200 Subject: [PATCH] update for new module --- .gitignore | 2 + example/test_for_paper.py | 79 +++++++++++++++++++++++ npfeintool/segmentation_v2.py | 117 ++++++++++++++++++++++++++++++++++ 3 files changed, 198 insertions(+) create mode 100755 example/test_for_paper.py create mode 100755 npfeintool/segmentation_v2.py diff --git a/.gitignore b/.gitignore index c8ba561..f5ffadb 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ npFeintool.egg-info .png test_phil.py test.csv +pipeline.ipynb +test_for_ruptures.ipynb diff --git a/example/test_for_paper.py b/example/test_for_paper.py new file mode 100755 index 0000000..e885a8a --- /dev/null +++ b/example/test_for_paper.py @@ -0,0 +1,79 @@ +from nptdms import TdmsFile +import numpy as np +import npfeintool as npF +from npfeintool import filterVar +import matplotlib.pyplot as plt + +def main(): + """ + This example is designed for testing + """ + # ============Step 1: Read in data============ + # load in data, take the TDMS data type as example + tdms_file = TdmsFile(".\\AKF_SS-FW2-H04521-H05000.tdms") + # tdms_file.groups() + df_all = tdms_file.object('Untitled').as_dataframe() + df_force = df_all['Stempel_1 (Formula Result)'] + df_stroke = df_all['Position_Ma'] + + # ============Step 2: Extract each punch shape============ + SEH = npF.SegHub() + # Init lists and dfs + segmentation_1 = [] + segmentation_2 = [] + segmentation_3 = [] + segmentation_4 = [] + segmentation_5 = [] + # Extract all punches of the dataset + df_punches = SEH.extract_hub(df_force, df_stroke) + #print(df_punches.describe()) + df_punches = df_punches.reset_index(drop=True) + # Step 1: kick out the "all zero", "all one" value + # this Step is essential for selection meaningful selection + + df_temp = npF.kill_all_x(0, df_force) + df_temp = npF.keep_x_var(0.8, df_temp) + # findout the selected columns + ix = np.isin(df_force.values[0,:], df_temp[0]) + column_indices = np.where(ix==True) + df_force = df_force.iloc[:, column_indices[0]] + ''' + Step 2: divide the periodical time series data into unit + e.g. we select only one channel data to segementation + Params are: dataset, windsize(the approx unit size) + ''' + df_punch = df_force['Stempel_1 (Formula Result)'] + # df_seg = npF.SW_seg(df_punch, 500) + + + len_s = 45000# len(s_fr) + delta = 400#276 + win_size = 1000 + i = 38000 + j = i + # s_fr.drop([len_s, len(s_fr) - 1]).to_csv('test.csv') + df_punch.truncate(before=i, after=len_s - 1).to_csv('test.csv') + # df['A'].truncate(before=2, after=4) + + while i < len_s: + k = npF.Trend_Change(df_punch[j:i+win_size], delta) + if k != -1: + df_punch[j:j+k].plot() + plt.savefig('plot' + str(i) + '.png') + plt.clf() + df_punch[j+k+1:i+win_size].plot() + plt.savefig('plot' + str(i) + 'x.png') + plt.clf() + print(str(j) + " + " + str(j + k) + "; " + str(j+k+1) + " to " + str(i + win_size)) + j = i + win_size + i += win_size + + plt.savefig('plot.png') + + # npF.Trend_Detection(df_punch[38000:45000], 400, 1000) + # choose one unit to dectec the trends + # npF.Trend_Detection(df_seg[2], 279, 1000) + # choose the whole sequences + # npF.Trend_Detection(df_power, 279, 1000) + +main() diff --git a/npfeintool/segmentation_v2.py b/npfeintool/segmentation_v2.py new file mode 100755 index 0000000..add6e50 --- /dev/null +++ b/npfeintool/segmentation_v2.py @@ -0,0 +1,117 @@ +from nptdms import TdmsFile +import matplotlib.pyplot as plt +import ruptures as rpt +import pandas as pd +import seaborn as sns +from scipy import stats +import scipy.cluster.hierarchy as hac +import math +import numpy as np +from random import sample +from scipy.stats import mode +from sklearn.neighbors import KNeighborsClassifier as KNN +from scipy.cluster.hierarchy import fcluster + +class SegHub(object): + + def segment_and_plot(self, df, model='l1', count=0, n_bkps=5): + ''' + recieves a segment as input and runs ruptures changeppoint detection algorithm to segment a + punch into subsequences. Eventually, it plots the segments, + saves the plot and returns a dataframe containing all segments numbered begining with 1 + ''' + dfs = pd.DataFrame() + lower_bound = 0 + upper_bound = len(df) + + # change point detection + #model = "l1" # "l2", "rbf" + + data = df[lower_bound:upper_bound].values + algo = rpt.Dynp(model=model, min_size=15, jump=15).fit(data) + my_bkps_t = algo.predict(n_bkps) + + j = 0 + lower_bound_temp = lower_bound + + while j < n_bkps : + df_temp = pd.DataFrame({j: df[my_bkps_t[j]:my_bkps_t[j+1]]}) + df_temp = df_temp.reset_index(drop=True) + dfs = pd.concat([dfs, df_temp], axis=1) + #df[lower_bound_temp:my_bkps_t[j]+lower_bound].plot() + #lower_bound_temp = my_bkps_t[j]+lower_bound + j = j + 1 + #plt.savefig('plot_' + model + '_' + str(count) + '.png') + #plt.clf() + #print(dfs) + return dfs + + def extract_hub(self, df_force, df_stroke, start=3000, threshold=0.51): + ''' + returns data frame containing each punch segment derived by the df_stroke timeseries data + ''' + dfs = pd.DataFrame() + flag = False + begin_temp = 0 + count = 0 + end = len(df_stroke) + for i in range(start, end): + if not(flag) and df_stroke[i] > threshold: + flag = True + begin_temp = i + elif flag and df_stroke[i] > threshold: + continue + elif flag and df_stroke[i] < threshold: + #print(begin_temp,i) + seg_temp = df_force[begin_temp:i].reset_index(drop=True) + #seg_temp = (begin_temp, i) + #segments.append(seg_temp) + df_temp= pd.DataFrame({count: seg_temp}) + #df_temp = pd.DataFrame(seg_temp) + #df_temp = df_temp.reset_index(drop=True) + dfs = pd.concat([dfs,df_temp], axis =1) + flag = False + count = count + 1 + else: + continue + return dfs + + + def plot_all(df_to_plot): + ''' + plots all columns in the dataframe + ''' + ax = None + for index in df_to_plot: + ax = sns.tsplot(ax=ax, data=df_to_plot[index].values, err_style="unit_traces") + plt.savefig('test.png') + + def separate_subsequence(df_segments, target_segment): + dfs = pd.DataFrame() + + for j in range(0, len(df_segments)): + df_temp = pd.DataFrame({j: df_segments[j]}) + df_temp = df_temp.reset_index(drop=True) + dfs = pd.concat([dfs, df_temp], axis=1) + + def DTWDistance(self, s1, s2): + ''' + plots all columns in the dataframe + ''' + DTW = {} + + for i in range(len(s1)): + DTW[(i, -1)] = float('inf') + for i in range(len(s2)): + DTW[(-1, i)] = float('inf') + DTW[(-1, -1)] = 0 + + for i in range(len(s1)): + for j in range(len(s2)): + dist = (s1[i]-s2[j])**2 + DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)]) + + return math.sqrt(DTW[len(s1)-1, len(s2)-1]) + + +