From fab010da2882ea326b16095b93a30130977981af Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 27 Nov 2023 23:17:23 +0100 Subject: [PATCH 01/33] Added symbolization --- dtaidistance/symbolization/__init__.py | 0 dtaidistance/symbolization/alignment.py | 92 +++++++++++++++++++++++++ dtaidistance/util.py | 1 + tests/rsrc/trace_motifs.py | 25 +++++++ tests/test_symbolization.py | 87 +++++++++++++++++++++++ 5 files changed, 205 insertions(+) create mode 100644 dtaidistance/symbolization/__init__.py create mode 100644 dtaidistance/symbolization/alignment.py create mode 100644 tests/rsrc/trace_motifs.py create mode 100644 tests/test_symbolization.py diff --git a/dtaidistance/symbolization/__init__.py b/dtaidistance/symbolization/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py new file mode 100644 index 00000000..3a995455 --- /dev/null +++ b/dtaidistance/symbolization/alignment.py @@ -0,0 +1,92 @@ +import numpy as np + +from ..util import SeriesContainer +from ..subsequence.subsequencealignment import subsequence_alignment +from ..exceptions import MatplotlibException + + +class SymbolAlignment: + def __init__(self, codebook): + """Translate a time series with continuous values to a list of discrete + symbols based on motifs in a codebook. + + :param codebook: List of motifs. + """ + self.codebook = codebook + self.use_c = False + self.symbols = None + + def align(self, series): + """Perform alignment. + + :param series: List of time series or a numpy array + """ + sc = SeriesContainer(series) + + patterns = np.zeros((len(sc), sc.get_max_length(), len(self.codebook) + 1)) + patterns[:, :, :] = np.inf + for sidx in range(len(sc)): + for midx in range(len(self.codebook)): + medoidd = np.array(self.codebook[midx]) + sa = subsequence_alignment(medoidd, sc[sidx]) + sa.use_c = self.use_c + for match in sa.kbest_matches(k=None): + patterns[sidx, match.segment[0]:match.segment[1], midx] = match.value + patterns[:, :, len(self.codebook)] = 0 + patterns[:, :, len(self.codebook)] = np.max(patterns) + 1 + best_patterns = np.argmin(patterns, axis=2).astype(int) + self.symbols = best_patterns + return best_patterns + + def align_fast(self, series): + use_c = self.use_c + self.use_c = True + result = self.align(series) + self.use_c = use_c + return result + + def hangover(self, symbols, threshold=4): + """Hangover filter for symbols.""" + sequences = [] + sequences_idx = [] + for r in range(symbols.shape[0]): + sequence = [] + sequence_idx = [] + lastval = None + lastcnt = 0 + firstidx = None + lastsaved = None + for c, v in enumerate(symbols[r, :]): + if v != lastval: + if lastcnt > threshold and lastval != lastsaved: + sequence.append(lastval + 1) # cannot be zero + sequence_idx.append((firstidx, c)) + lastsaved = lastval + lastval = v + lastcnt = 0 + firstidx = c + else: + lastcnt += 1 + sequences.append(sequence) + sequences_idx.append(sequence_idx) + return sequences, sequences_idx + + def plot(self, series, sequences, sequences_idx, labels=None, filename=None): + try: + import matplotlib.pyplot as plt + from matplotlib import gridspec + except ImportError: + raise MatplotlibException("No matplotlib available") + sc = SeriesContainer(series) + fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex=True, sharey="col", figsize=(12, 8)) + for r in range(series.shape[0]): + axs[r].plot(series[r, :]) + if labels is not None: + axs[r].set_ylabel(f"L={labels[r]}") + for symbol, (fidx, lidx) in zip(sequences[r], sequences_idx[r]): + axs[r].text(fidx, 0, str(symbol)) + if filename is not None: + fig.savefig(filename) + plt.close(fig) + else: + return fig, axs diff --git a/dtaidistance/util.py b/dtaidistance/util.py index 9588fad7..dcae7cd5 100644 --- a/dtaidistance/util.py +++ b/dtaidistance/util.py @@ -182,6 +182,7 @@ def detect_ndim(s): return 0 return None + class SeriesContainer: def __init__(self, series, support_ndim=True): """Container for a list of series. diff --git a/tests/rsrc/trace_motifs.py b/tests/rsrc/trace_motifs.py new file mode 100644 index 00000000..79b7e9b4 --- /dev/null +++ b/tests/rsrc/trace_motifs.py @@ -0,0 +1,25 @@ +{"medoidd": [ +[0.009738326537240294, 0.011964652106953237, 0.010622285847292336, 0.006757389104024892, 0.0023183732632415816, -0.0016447205656971386, -0.004190257258034718, -0.004219463480007841, -0.0026165371465944583, -0.0019341072403156801, -0.002716419198005369, -0.0029963950825212414, -0.001376726431888751, 0.001236942460339333, 0.0028858629435277857, 0.0026377171697628773, 0.0005275283163810499, -0.0028965785617120934, -0.005712986745093676, -0.006054655003452295, -0.004351062159881046, -0.0018933179604214887, 0.000729286847484617, 0.002709335954796943, 0.002777695391245453, 0.0008928276714737274, -0.000626316464697428, 1.9329081864709135e-05, 0.0012525481814324455, 0.0011303745226398355, 0.0003484993533922028, 0.00035736616584081435, 0.0014560907380688536, 0.003103529496780638, 0.004388480863955921, 0.004488912529141967, 0.003192434762938183, 0.0008631602742261619, -0.0017723766579757082, -0.004135397640431513, -0.005674559837338596, -0.00505402950231339, -0.002034223246675937, 0.0007283173413888254, 0.0008673986955736561, -0.0005470301710505041, -0.0008629225635741261, 0.0010805589791988198, 0.004600934520062411, 0.00813133332288657, 0.010181998041574374, 0.010468841307302856, 0.009448599888936784, 0.006878375499617246, 0.00297825702279434, 0.0001785912756702197, 0.000736952816741393, 0.003648783778532591, 0.005545196972473927, 0.003719925920095755, -0.0011786196934802113, -0.005334061435820166, -0.005297259703796474, -0.0004887333309842194, 0.007665979162742368], +[0.006270817842656334, 0.004897354208389141, 0.00037858874958891244, -0.0023801627426085687, 0.0010160720717771908, 0.010541678820179482, 0.023232531296182215, 0.007690077587110239, 0.006802228295572502, 0.006271477606357726, 0.006058306653867977, 0.005420780075314209, 0.003361491673163077, 0.00015141696235257288, -0.0023997358292757367, -0.0027394189360561384, -0.0013171917599329283, 0.0002961588399015181, 0.0010960707723143084, 0.0007299790965057298, -0.00028305071073562587, -0.0010423887904395547, -0.0016513444099927124, -0.002208980196654907, -0.0021252801284368144, -0.0016308806081128126, -0.0011680816825771208, -0.0005996683346982936, -0.00045863495292597855, -0.001355917514670226, -0.0028622979777913104, -0.004207055884407328, -0.00482858264292102, -0.004605796073955368, -0.003853967515424759, -0.0025930508474843444, -0.0003954981097867117, 0.0028361005830696826, 0.006312918421142607, 0.008640829437122317, 0.009014477057184929, 0.007993594877976618, 0.006434091571258044, 0.004041084533775861, 0.0001930334111030506, -0.004190807339089397, -0.0075356873236916404, -0.009107995095017576, -0.008266428013016745, -0.005107391488292967], +[-0.002681499422080137, -0.0024117043965075895, -0.0017065615812844007, -0.0019008535941043717, -0.003200293069219901, -0.0042996161973646845, -0.003148500247459164, 0.0008766412489297546, 0.005743935212368062, 0.00809941991402081, 0.006291905843884986, 0.002159876410444946, -0.0013536520442112394, -0.0030429798493856785, -0.0025540719574827725, 0.00034787998275495987, 0.004542429047787261, 0.007830719026473646, 0.00895046375410627, 0.008576544437107796, 0.008014701349437332, 0.008116320674321545, 0.009420245649951, 0.011390085672354215, 0.013086659097448943, 0.015158869631013708, 0.01870373328418389, 0.023470739535650292, 0.028863308608869407, 0.03521150540361835, 0.04266621915922581, 0.049988333246829456, 0.056476456563709816, 0.06329226746570606, 0.07120930522129196, 0.07949789094881139, 0.08732998119887687, 0.09469281019216148, 0.10199885523501928, 0.10875157328975052, 0.11332774775068534, 0.11466036053436131, 0.1128468902087235, 0.10873503763404527, 0.10411307714829035, 0.10049589897151319, 0.09753849040033948, 0.0937521210787526, 0.08769188398647072, 0.07876309113970044, 0.06787498114844222, 0.056813184547542875, 0.04710989185935388, 0.039519316243316095, 0.03385974907872909, 0.029379501006495678, 0.025292341650783066, 0.020838377257869863, 0.015836402644680985, 0.011377534341669862, 0.008198711506334985], +[0.019638089047306892, 0.022960098283461242, 0.0266987278540745, 0.029310683620425198, 0.02978234653913065, 0.02782829989073779, 0.02413033944407573, 0.021124904118428416, 0.02187705290898641, 0.026556800687623793, 0.031566848430746974, 0.03387491277961122, 0.034592240566451354, 0.036555754790603044, 0.039479946843599546, 0.04047879484809621, 0.03910449295423753, 0.03764128091693777, 0.03709495980042607, 0.03707558017902045, 0.037638864643341235, 0.038396279263272576, 0.03801461009107701, 0.03534623400578789, 0.030710654861913027, 0.026782128156084294, 0.02673233449991051, 0.030618977087820128, 0.03565115065827135, 0.03983123996534304, 0.04266992002963381, 0.0437620563483251, 0.04299186600703205, 0.04054339814681518, 0.036191288005048325, 0.030002313353507643, 0.0238270627313579, 0.020754364764397973, 0.022092424907163278, 0.026237650858748945, 0.03117833931584861, 0.035427853469443334, 0.03672350032071756, 0.03346364974739277, 0.026650194650179815, 0.018818824137463636, 0.012919892795613081, 0.011430419060840667, 0.014490811742497953, 0.01957469121659563, 0.024075876826009496, 0.026927140497047482], +[-0.004287253944047985, -0.00998940531493957, -0.011857964987516324, -0.008779183225929176, -0.0026364266627539137, 0.0037727023568159546, 0.008106236601059027, 0.008638136341663556, 0.005460818619820337, 0.0007974025428244316, -0.003008364861786507, -0.0044627841235969365, -0.0027238396495833676, 0.0013689943181857313, 0.005311444285229046, 0.007092624456906419, 0.006169160884295716, 0.0030336150267400346, -0.0009329313065168424, -0.004100751736863347, -0.005568077189751667, -0.005262883343173421, -0.0036447996069310404, -0.0016271850949530673, -0.00031533754048946233, -0.0003083320252870954, -0.0009481992313219693, -0.0008302040898082009, 0.0002653335745451387, 0.0009231498016950581, 0.00010665963812263235, -0.0013180083835298462, -0.0014253488745918884, 0.0002887968415663256, 0.0018660053245024733, 0.0015216497840771425], +[-0.0028134954581731405, 0.002818067556625666, 0.008465797137608711, 0.01041163174444901, 0.008191553063858228, 0.004998935595377407, 0.0035394432383308045, 0.0024395812214368824, -0.0005295850581698381, -0.004291455806094737, -0.006353145390577032, -0.0063997215018752876, -0.005679399387115195, -0.004372984493769128, -0.0023228778613790793, -0.0006137258239916762, -0.00011358600226176786, -0.0003897195837385862, -3.0508650738133517e-05, 0.002558031329144365, 0.007266761668714037, 0.012370885969396245, 0.01709458635364734, 0.022163271077401684, 0.027705055800415993, 0.030981393816246584, 0.02497631862366227, -0.0013363597496144465, -0.06087209542912369, -0.16314492763685537, -0.30323656548437816, -0.4496864828340082, -0.5465914261157763, -0.5454095023093279, -0.4467459563006378, -0.3003772555278551, -0.16218405733599833, -0.061867851331927785, -0.0029751911002914213, 0.024283157437625345, 0.032546240713844536, 0.03166010692749919, 0.02695112096438938, 0.02081250942021684, 0.014907091728462489, 0.00944939662023517, 0.0034640516957625683, -0.0013819536702207973, -0.0014730729244202211, 0.003744606142039035, 0.01150051491818315], +[-0.005637183764428881, -0.0005425615124055582, 0.00467598681440966, 0.006941653324953008, 0.005504125526229633, 0.0018529813136035648, -0.0022396118155235227, -0.005885581135736346, -0.008501593748712958, -0.009007185818746849, -0.0060557918362190745, 0.0011867152314741648, 0.013328654785708114, 0.031094690013449885, 0.05322210700708025, 0.07438534099154805, 0.08734357483114687, 0.08748337976349847, 0.07466960001480048, 0.05137055965771665, 0.02053317231477691, -0.014467690724672917, -0.04928497643841687, -0.0793111260853394, -0.10147108171806891, -0.1152891512488047, -0.12194231911565526, -0.12233177509498788, -0.11547026205716944, -0.09862845793696035, -0.0702696960292381, -0.03277878013013172, 0.007990844543684896, 0.044853026357608274, 0.07152049593127682, 0.08431687934902682, 0.08263815176193566, 0.06906647247758037, 0.049160480617405036, 0.029253384173005607, 0.013209594324337843, 0.0019660403347358814, -0.004077347165462647, -0.004426127468342706, -0.0006709682006686829, 0.002766652181406833, 0.0025474977364212913, 0.00018215252005965115, -0.0008025029929843114, 0.0005559589444060119, 0.0023005756179447223, 0.0016303244207306231, -0.0018628487828121795, -0.0050539099237965545, -0.004758757432020269, -0.0006838653589661196, 0.004680796284491798, 0.0077003535603889705, 0.0061310955932926455, 0.00045549138651656293, -0.006785758882060161], +[0.008297140728395474, 0.0077829433321384344, 0.004687346718570514, 0.0007329181023744144, -0.002114917405532092, -0.003448924355469772, -0.0040892297461128544, -0.004018560788588368, -0.001867856187316919, 0.0022826828732570477, 0.005319466070316521, 0.003721145202993613, -0.0033030480901815817, -0.014082079269281306, -0.025676282646198475, -0.03219528900260038, -0.023147203765532656, 0.015420476174373617, 0.09737316575438129, 0.22890194633293265, 0.39658123160733666, 0.5529010577453783, 0.6145196880142717, 0.4999104721596957, 0.18828458107652374, -0.25813697713523653, -0.7145423914161589, -1.030628654430789, -1.099372013772695, -0.9312964732792466, -0.6414052909054304, -0.3548209133742218, -0.14035556323903312, -0.011609387771321696, 0.04724982129543476, 0.06155024252430186, 0.05378234085389313, 0.03837809090895361, 0.023034329409584022, 0.011260962192221573, 0.004430232851783308, 0.0023938235765319036, 0.0028234317404478315, 0.00361686997155872, 0.005090788674395571, 0.007814426362782699, 0.01097272493839408, 0.013406375411413185, 0.014253641336574635, 0.012702819356992308, 0.009022913485456628, 0.005986635379246916, 0.006628237772934327, 0.010146792669749844, 0.01305233582388616], +[-0.008289981559741252, -0.003298433478538587, 0.0006011075075317449, 0.002113744272287928, 0.003135243113088281, 0.006340861567124984, 0.01112683304550924, 0.014149164837280308, 0.013262970586412605, 0.008757782270557686, 0.0015003636614788001, -0.0066258097875516965, -0.011387520278202407, -0.009872281509779824, -0.00396080143902116, 0.0020645609511286134, 0.005304857211823105, 0.005353017156831702, 0.003054685018006191, -0.00037959719872097584, -0.002925222963897787, -0.0027728444978279104, -0.00040126994635767256, 0.0022703960437533022, 0.004120015361423814, 0.004874531711162307, 0.004337925759834736, 0.0028441206406502446, 0.0022388247143080514, 0.0037990828641406016, 0.00487426142230123, 0.0019013381775676791, -0.0038760942572835253, -0.007663492817699112, -0.0065740473449975535, -0.0013788929408840946, 0.005280709726334078, 0.010027091943077101, 0.009595359628257449, 0.0036311427104917665, -0.004247940977554462, -0.009649905790135738, -0.010027282259316157, -0.005531820321715642, 0.00034102643555090494, 0.003685594155213082, 0.0045731341312024625, 0.005320556854761252, 0.005866265273261798, 0.0039684891508613715, -0.0009974513223597273], +[0.006887267992163895, 0.00392011134409433, 0.00040503481678873514, -0.0019629276312794043, -0.002596796640918944, -0.001680209119645267, 0.00042938089892913635, 0.003215724868680723, 0.005324548156030924, 0.005225578848272202, 0.0031070312969869564, 0.0003352915093196215, -0.0026274004911509153, -0.005795414121678272, -0.00787982086080176, -0.00687683031619244, -0.0023271832312106917, 0.003872737073070975, 0.008760901494536054, 0.010082186054751236, 0.007260161690492175, 0.001747862029934226, -0.003248216761241405, -0.004661450092216911, -0.0020802407736212024, 0.0022225702215137415, 0.005335918558848557, 0.005007541293426102, 0.0008158241688040525, -0.004602877817528118, -0.0070364634171218654, -0.004395953840989917, 0.0012249817698188495, 0.005584596712497001, 0.006623303568554763, 0.0057348818977645925, 0.004934538246857169, 0.00458778668368428, 0.004083361286345073, 0.0033480893005954243, 0.0025460600925180125, 0.001240881605726499, -0.0007646261622779149, -0.00306833243615988, -0.005697450903120099, -0.009209383000139513, -0.014225122536912852, -0.020270023795538418, -0.02364515974425013, -0.017538852951044796, 0.00757141383586031], +[-0.020912806580237586, -0.0025641181343891463, 0.005562608882155661, 0.005978369178343428, 0.003376555220721077, 0.0011509998858137156, 0.0008052099641628226, 0.0017663915557843489, 0.0016562075860186648, -0.0008940572498723937, -0.004241794055713198, -0.0059419920299924826, -0.005286927009570219, -0.0026272510720726843, 0.0015913382246666523, 0.006018553817896848, 0.008345535907443769, 0.007085611522267054, 0.0027494783014837764, -0.0024224666590463694, -0.00528857278247747, -0.004059679630566864, -0.0005869903059323128, 0.0017225989643580076, 0.0021068640185552996, 0.0018659066039036537, 0.001533266492704893, 0.0014441085191057015, 0.0022289654344537562, 0.0033819435087403373, 0.0036072455473456504, 0.0023942137373668656, 0.0006100492690199963, 1.5573554607759682e-05, 0.0017203732957885873, 0.005294289965408637, 0.009781691119293907, 0.014276629246363307, 0.01774435416158842, 0.0202694313323587, 0.02335157934994808, 0.027754481009611677, 0.032967114899024534, 0.03890279678730579, 0.04657237755678889, 0.056695432256006005, 0.06789080757919845, 0.07734540429594018, 0.08333650264184386, 0.08571584799436058] +],"medoid": [ +[0.84438188, 0.77838186, 0.65133807, 0.77459291, 0.79400462, 0.705399, 0.75068893, 0.80349276, 0.80113573, 0.80269437, 0.78572642, 0.82289457, 0.80321875, 0.72450701, 0.79889292, 0.80933816, 0.78873924, 0.7747986, 0.74685353, 0.77255592, 0.80416358, 0.75443181, 0.80760421, 0.81502863, 0.75615011, 0.73772501, 0.80187316, 0.7252401, 0.77954855, 0.73518276, 0.7938198, 0.82429072, 0.71670352, 0.74409082, 0.80054044, 0.78212454, 0.76370865, 0.77596236, 0.76210875, 0.77428764, 0.77220979, 0.80545471, 0.76612732, 0.83304828, 0.74612291, 0.81431912, 0.80606316, 0.72228485, 0.74810564, 0.78790931, 0.80887416, 0.76908482, 0.76239796, 0.76896965, 0.75803653, 0.77854661, 0.82319608, 0.78634199, 0.8107643, 0.82866458, 0.89407705, 0.7641228, 0.83622315, 0.78177368, 0.83749782], +[0.54060152, 0.53820263, 0.58579237, 0.54910136, 0.57988204, 0.57966234, 0.49633727, 0.55481258, 0.52730963, 0.49980665, 0.52698193, 0.53934842, 0.55171494, 0.52500144, 0.55546648, 0.52442913, 0.485412, 0.55053202, 0.59755011, 0.60642837, 0.52293427, 0.52428721, 0.54742457, 0.57056197, -1.2781808, -1.270717, -1.2754118, -1.2440114, -1.296536, -1.2243645, -1.2484483, -1.2160164, -1.2532291, -1.2904419, -1.2262962, -1.274638, -1.2482585, -1.2257146, -1.2576611, -1.2811945, -1.205515, -1.2768804, -1.2829125, -1.21444, -1.2922766, -1.2700439, -1.2384597, -1.2754035, -1.2495082, -1.275582], +[-1.764959, -1.758428, -1.755384, -1.8048089, -1.7427984, -1.7524461, -1.7743527, -1.7622733, -1.6832404, -1.7584536, -1.7097339, -1.776999, -1.7424336, -1.7463811, -1.7500579, -1.7147319, -1.7549275, -1.7694574, -1.7880172, -1.7694329, -1.7332743, -1.6743591, -1.7698743, -1.7222443, -1.7295982, -1.747146, -1.7699144, -1.7362798, -1.7385906, -1.6721379, -1.7521683, -1.6666638, -1.6974977, -1.7283316, -1.6400473, -1.6508405, -1.6590755, -1.6306346, -1.608026, -1.5477316, -1.5654866, -1.5219054, -1.4073534, -1.3859411, -1.3533105, -1.2430636, -1.1949239, -1.0675013, -0.99737071, -0.91313013, -0.79654296, -0.65093837, -0.56690075, -0.43432784, -0.29039244, -0.27083328, -0.11613362, -0.055487159, 0.061009917, 0.15074492, 0.25451297], +[-1.5542495, -1.5330937, -1.5585277, -1.5113529, -1.5060341, -1.4782694, -1.5001215, -1.4172555, -1.4245774, -1.366623, -1.346209, -1.2843111, -1.2883973, -1.3343954, -1.2884746, -1.2020445, -1.1564009, -1.1358806, -1.1976362, -1.062566, -1.0033826, -0.98656063, -1.0109437, -0.87716813, -0.90947203, -0.86403268, -0.80511885, -0.79989802, -0.70165297, -0.68401296, -0.66041206, -0.71670277, -0.62491077, -0.56559891, -0.58100018, -0.48570767, -0.46930309, -0.40938651, -0.40075362, -0.31839444, -0.2957306, -0.24879375, -0.31811217, -0.25096126, -0.21087197, -0.20488602, -0.19484992, -0.075498928, -0.073965346, -0.055008802, 0.018661099, -0.059364282], +[0.60497311, 0.64637229, 0.67595275, 0.65099111, 0.60312627, 0.57634147, 0.59192132, 0.53136129, 0.60233072, 0.61163535, 0.56334068, 0.63242396, 0.60408228, 0.59699419, 0.58054885, 0.58267229, 0.60439374, 0.60656703, 0.61371991, 0.63322789, 0.65618444, 0.60716864, 0.57640042, 0.5488937, 0.59458537, 0.55005185, 0.596115, 0.62343982, 0.63446363, 0.58853702, 0.63716601, 0.58929181, 0.5732526, 0.57766061, 0.62334847, 0.59164165], +[0.7357068, 0.77987384, 0.74622201, 0.74054013, 0.69766764, 0.73834508, 0.71615239, 0.76901016, 0.73525696, 0.65103513, 0.71464667, 0.69380954, 0.78088556, 0.78842875, 0.7205993, 0.69822978, 0.80121693, 0.76916153, 0.7607292, 0.71088422, 0.73344384, 0.78071967, 0.66464739, 0.74940689, 0.71842323, 0.7305697, 0.74343837, 0.73903769, 0.67721796, 0.75011665, 0.74548968, 0.78380898, 0.75960263, 0.79420748, 0.75070558, 0.81269416, 0.76917626, 0.77648279, 0.82532642, 0.69166605, 0.44261591, -0.64707618, -1.7746657, -2.0157572, -2.0856148, -2.0796597, -2.0888094, -2.0925688, -2.088029, -2.0951268, -2.0866033], +[0.69750851, 0.65881802, 0.64291074, 0.7088848, 0.74402446, 0.70394086, 0.69329196, 0.68948537, 0.73774147, 0.75059863, 0.72782298, 0.70504731, 0.66748104, 0.68074236, 0.69400369, 0.73812531, 0.7288474, 0.71956948, 0.7185948, 0.71706089, 0.71552699, 0.70321344, 0.67378033, 0.71011538, 0.7091126, 0.69389718, 0.72282214, 0.82990963, 0.95996032, 1.0596339, 1.1245853, 1.1717446, 1.2189038, 1.1882791, 1.1576547, 1.0217234, 0.93527351, 0.8079167, 0.7036576, 0.60427898, 0.49358025, 0.33578195, 0.25705993, 0.21923055, 0.21883435, 0.30094606, 0.38305775, 0.47430962, 0.60066829, 0.65948806, 0.7160349, 0.66490137, 0.73110295, 0.72146614, 0.71182933, 0.6761567, 0.64016918, 0.75153756, 0.7592232, 0.69488757, 0.70314184], +[0.61966872, 0.59997056, 0.59309788, 0.59097654, 0.66743532, 0.6258567, 0.6428329, 0.61084533, 0.60601246, 0.65786714, 0.61675659, 0.58428597, 0.57911015, 0.61814446, 0.6546757, 0.62327132, 0.61820943, 0.67392847, 0.60636712, 0.64118791, 0.6128218, 0.5897886, 0.67797797, 0.71699196, 1.0117528, 2.2726181, 3.469821, 3.8304702, 3.8196334, 3.2619192, 1.0205306, -1.2214097, -1.7390308, -1.867537, -1.8828808, -1.9184427, -1.8976587, -1.838841, -1.924583, -1.8400097, -1.8427256, -1.8667457, -1.7722318, -1.8766957, -1.8453581, -1.7864935, -1.808093, -1.8282433, -1.7986122, -1.8028363, -1.7642486, -1.7948663, -1.7103064, -1.7080497, -1.7272006], +[0.67341011, 0.70143316, 0.72679335, 0.61423037, 0.68534071, 0.68526799, 0.60639479, 0.62336117, 0.60596388, 0.63999913, 0.69361348, 0.60755117, 0.61197791, 0.63231148, 0.68055122, 0.70665096, 0.64302471, 0.76326157, 0.72177707, 0.62492677, 0.64499797, 0.62468628, 0.67372392, 0.67905212, 0.66150807, 0.69686478, 0.69033216, 0.68281078, 0.60971636, 0.70519194, 0.64119668, 0.6925056, 0.6692134, 0.67918246, 0.75355963, 0.6369969, 0.64142484, 0.71243301, 0.767838, 0.70068599, 0.67853586, 0.65825095, 0.67003952, 0.65834781, 0.66051836, 0.77453371, 0.72348253, 0.71621468, 0.70639226, 0.66399129, 0.61357363], +[0.61601875, 0.55875039, 0.61920101, 0.58680507, 0.65630765, 0.62532182, 0.62333677, 0.62135171, 0.61077624, 0.61646906, 0.62216187, 0.57777527, 0.64522626, 0.64334256, 0.64145885, 0.6092165, 0.65203358, 0.64423629, 0.62393206, 0.59370222, 0.58331912, 0.59988164, 0.61644417, 0.63818231, 0.65992046, 0.6720105, 0.64342324, 0.59594752, 0.62309925, 0.61544211, 0.6242389, 0.65967616, 0.69254304, 0.64755901, 0.62593917, 0.57304026, 0.60997802, 0.66971546, 0.64983222, 0.65216884, 0.63316681, 0.65775112, 0.67792936, 0.64434178, 0.6632992, 0.68225661, 0.66526794, 0.64585782, 0.67707294, 0.61787448, 0.65801584], +[0.78851334, 0.7840903, 0.83304869, 0.7731245, 0.83172647, 0.77906608, 0.82847427, 0.79732765, 0.79621608, 0.79100463, 0.80762596, 0.79441029, 0.79587389, 0.8021834, 0.80804171, -2.0450118, -2.0916771, -2.0776367, -2.0335167, -1.990173, -2.0577092, -2.0128429, -2.0509702, -2.0553157, -2.0760096, -1.9999803, -2.0131392, -2.0493882, -2.0567518, -2.0454945, -2.0604343, -2.0796921, -2.062318, -2.0275983, -2.0246658, -2.0089151, -2.000085, -2.0572912, -2.0903948, -2.0479302, -1.9873114, -2.073722, -2.0292912, -2.0143261, -2.0446235, -2.0512608, -2.0202187, -2.0313915, -1.9981877, -2.0102867] +]} diff --git a/tests/test_symbolization.py b/tests/test_symbolization.py new file mode 100644 index 00000000..041d0a25 --- /dev/null +++ b/tests/test_symbolization.py @@ -0,0 +1,87 @@ +import os +import ast +from pathlib import Path + +import pytest + +from dtaidistance import util_numpy +from dtaidistance.symbolization.alignment import SymbolAlignment +from dtaidistance.preprocessing import differencing +from dtaidistance import dtw_visualisation as dtwvis +from dtaidistance.exceptions import MatplotlibException + + +# If environment variable TESTDIR is set, save figures to this +# directory, otherwise do not save figures +directory = Path(os.environ.get('TESTDIR', Path(__file__).parent)) + +numpyonly = pytest.mark.skipif("util_numpy.test_without_numpy()") + + +@numpyonly +def test_trace1(): + print(f"Using directory: {directory}") + with util_numpy.test_uses_numpy() as np: + # Load data + rsrc = Path(__file__).parent / 'rsrc' + rsrc_fn = rsrc / 'Trace_TRAIN.txt' + data = np.loadtxt(rsrc_fn) + labels = data[:, 0] + series = data[:, 1:] + + # Filter data (to speed up test) + nb_occurrences = 3 # occurrence of each class + data2 = np.zeros((4 * nb_occurrences, data.shape[1])) + cnts = [0] * (4 + 1) + for r in range(data.shape[0]): + label = int(data[r, 0]) + if cnts[label] < nb_occurrences: + data2[cnts[label] + (label - 1) * nb_occurrences, :] = data[r, :] + cnts[label] += 1 + data = data2 + print(f"Data: {data.shape}") + data = data[np.argsort(data[:, 0])] + labels = data[:, 0] + series = data[:, 1:] + + # Load motifs (learned with LoCoMotif) + with (rsrc / 'trace_motifs.py').open('r') as fp: + data = fp.read() + array = np.array + data = ast.literal_eval(data) + medoidsd = data["medoidd"] + medoids = data["medoid"] + + # Plot motifs + if directory is not None and not dtwvis.test_without_visualization(): + try: + import matplotlib.pyplot as plt + from matplotlib import gridspec + except ImportError: + raise MatplotlibException("No matplotlib available") + + fig, axs = plt.subplots(nrows=len(medoids), ncols=2, sharex=True, sharey="col", figsize=(6, 8)) + fig.subplots_adjust(wspace=0.4, hspace=1) + for idx, (medoid, medoidd) in enumerate(zip(medoids, medoidsd)): + axs[idx, 0].plot(medoid) + axs[idx, 0].set_title(f"Medoid {idx + 1}") + axs[idx, 1].plot(medoidd) + fig.savefig(directory / "medoids.png") + plt.close(fig) + + # Preprocess time series + seriesd = differencing(series, smooth=0.1) + + # Symbolization + sa = SymbolAlignment(codebook=medoidsd) + symbols = sa.align_fast(seriesd) + if directory is not None: + np.savetxt(str(directory / "symbolized.npy"), symbols, fmt='%i') + + # Hangover + sequences, sequences_idx = sa.hangover(symbols, threshold=4) + if directory is not None and not dtwvis.test_without_visualization(): + sa.plot(series, sequences, sequences_idx, + labels=labels, filename=directory / "series.png") + sa.plot(seriesd, sequences, sequences_idx, + labels=labels, filename=directory / "seriesd.png") From b0bbf00131d139b7230aa1a597be8c314e14d2a6 Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 30 Nov 2023 15:40:46 +0100 Subject: [PATCH 02/33] symbolization --- dtaidistance/symbolization/alignment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index 3a995455..a2010a88 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -28,8 +28,7 @@ def align(self, series): for sidx in range(len(sc)): for midx in range(len(self.codebook)): medoidd = np.array(self.codebook[midx]) - sa = subsequence_alignment(medoidd, sc[sidx]) - sa.use_c = self.use_c + sa = subsequence_alignment(medoidd, sc[sidx], use_c=self.use_c) for match in sa.kbest_matches(k=None): patterns[sidx, match.segment[0]:match.segment[1], midx] = match.value patterns[:, :, len(self.codebook)] = 0 From 32c17b6abf0ae11b8661f5417261040174e760b9 Mon Sep 17 00:00:00 2001 From: Aras Yurtman <47949469+aras-y@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:30:07 +0100 Subject: [PATCH 03/33] correct the default cost for multivariate time series in the dosctring --- dtaidistance/dtw.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 198ccfc7..03a20349 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -414,7 +414,7 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, :param psi_neg: Replace values that should be skipped because of psi-relaxation with -1. :param use_c: Use the C implementation instead of Python :param use_ndim: The input series is >1 dimensions. - Use cost = EuclideanDistance(s1[i], s2[j]) + Use cost = SquaredEuclideanDistance(s1[i], s2[j]) :param inner_dist: Distance between two points in the time series. One of 'squared euclidean' (default), 'euclidean' :returns: (DTW distance, DTW matrix) From a017d4f5a48f18bc55ac89b21cf95be713649960 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 31 Jan 2024 14:46:47 +0100 Subject: [PATCH 04/33] Documentation --- docs/index.rst | 1 + docs/usage/advanced.rst | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 docs/usage/advanced.rst diff --git a/docs/index.rst b/docs/index.rst index daed7de0..8ffb01b4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,7 @@ Source available on https://github.com/wannesm/dtaidistance. usage/subsequence usage/sequence usage/similarity + usage/advanced usage/changelist diff --git a/docs/usage/advanced.rst b/docs/usage/advanced.rst new file mode 100644 index 00000000..8b9edf12 --- /dev/null +++ b/docs/usage/advanced.rst @@ -0,0 +1,20 @@ +Advanced features +----------------- + +Use float instead of double +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The fast version is based on C, which defaults to the double +datatype. It is possible to compile the C code to use float or +integers if that is required (requires a C compiler and make): + +:: + + git clone https://github.com/wannesm/dtaidistance.git + cd dtaidistance/dtaidistance/jinja + make float + # make int + cd ../.. + make build + pip install . + From 592b45c1c8bac6aad117b946756efe8ee7221795 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 12 Feb 2024 22:47:46 +0100 Subject: [PATCH 05/33] Improve symbolization --- dtaidistance/preprocessing.py | 2 +- dtaidistance/symbolization/alignment.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index e89fc6fd..d433ad1a 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -19,7 +19,7 @@ def differencing(series, smooth=None): :param series: Time series (must be numpy compatible) :param smooth: Smooth the series by removing the `smooth` percentage ([0-1]) of highest frequency. - :return: Differenced Numpy array + :return: Differenced Numpy array of length len(series) - 1 """ try: import numpy as np diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index a2010a88..72c47177 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -70,20 +70,23 @@ def hangover(self, symbols, threshold=4): sequences_idx.append(sequence_idx) return sequences, sequences_idx - def plot(self, series, sequences, sequences_idx, labels=None, filename=None): + def plot(self, series, sequences, sequences_idx, labels=None, filename=None, figsize=None): try: import matplotlib.pyplot as plt from matplotlib import gridspec except ImportError: raise MatplotlibException("No matplotlib available") + if figsize is None: + figsize = (12, 8) sc = SeriesContainer(series) - fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex=True, sharey="col", figsize=(12, 8)) + fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex=True, sharey="col", figsize=figsize) for r in range(series.shape[0]): + avg_value = series[r, :].mean() axs[r].plot(series[r, :]) if labels is not None: axs[r].set_ylabel(f"L={labels[r]}") for symbol, (fidx, lidx) in zip(sequences[r], sequences_idx[r]): - axs[r].text(fidx, 0, str(symbol)) + axs[r].text(fidx, avg_value, str(symbol)) if filename is not None: fig.savefig(filename) plt.close(fig) From f2dd0cfc99749f4903c41e1c66e6dc952d1f56f3 Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 8 Mar 2024 11:49:08 +0100 Subject: [PATCH 06/33] docs --- docs/conf.py | 6 ++-- docs/index.rst | 1 + docs/modules/preprocessing.rst | 3 ++ dtaidistance/dtw.py | 7 +++-- dtaidistance/dtw_ndim_visualisation.py | 3 +- dtaidistance/dtw_visualisation.py | 5 +-- dtaidistance/preprocessing.py | 17 ++++++---- dtaidistance/symbolization/alignment.py | 41 +++++++++++++++++++++++-- 8 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 docs/modules/preprocessing.rst diff --git a/docs/conf.py b/docs/conf.py index 11b9de50..16b67949 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,7 +51,7 @@ # General information about the project. project = 'DTAIDistance' -copyright = '2018-2022, Wannes Meert' +copyright = '2018-2024, Wannes Meert' author = 'Wannes Meert' # The version info for the project you're documenting, acts as replacement for @@ -59,9 +59,9 @@ # built documents. # # The short X.Y version. -version = '2.2' +version = '2.3' # The full version, including alpha/beta/rc tags. -release = '2.2.1' +release = '2.3.9' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 8ffb01b4..df62eda5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,6 +46,7 @@ Source available on https://github.com/wannesm/dtaidistance. modules/ed modules/clustering modules/subsequence + modules/preprocessing Indices and tables diff --git a/docs/modules/preprocessing.rst b/docs/modules/preprocessing.rst new file mode 100644 index 00000000..76920a77 --- /dev/null +++ b/docs/modules/preprocessing.rst @@ -0,0 +1,3 @@ + +.. automodule:: dtaidistance.preprocessing + :members: diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 03a20349..c48e6da3 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -6,7 +6,7 @@ Dynamic Time Warping (DTW) :author: Wannes Meert -:copyright: Copyright 2017-2022 KU Leuven, DTAI Research Group. +:copyright: Copyright 2017-2024 KU Leuven, DTAI Research Group. :license: Apache License, Version 2.0, see LICENSE for details. """ @@ -183,7 +183,7 @@ def lb_keogh(s1, s2, window=None, max_dist=None, def ub_euclidean(s1, s2, inner_dist=innerdistance.default): - """ See ed.euclidean_distance""" + """ See :meth:`dtaidistance.ed.euclidean_distance`""" return ed.distance(s1, s2, inner_dist=inner_dist) @@ -208,7 +208,8 @@ def distance(s1, s2, :param s1: First sequence :param s2: Second sequence :param window: Only allow for maximal shifts from the two diagonals smaller than this number. - It includes the diagonal, meaning that an Euclidean distance is obtained by setting window=1. + It includes the diagonal, meaning that an Euclidean distance is obtained by setting + ``window=1.`` :param max_dist: Stop if the returned values will be larger than this value :param max_step: Do not allow steps larger than this value :param max_length_diff: Return infinity if length of two series is larger diff --git a/dtaidistance/dtw_ndim_visualisation.py b/dtaidistance/dtw_ndim_visualisation.py index f28a68bf..0190f48a 100644 --- a/dtaidistance/dtw_ndim_visualisation.py +++ b/dtaidistance/dtw_ndim_visualisation.py @@ -39,11 +39,12 @@ def plot_warping(s1, s2, path, filename=None): - """Plot the optimal warping between to sequences. + """Plot the optimal warping between two sequences. :param s1: From sequence. :param s2: To sequence. :param path: Optimal warping path. + Can be computed with methods like ``dtw_ndim.warping_path``. :param filename: Filename path (optional). """ if np is None: diff --git a/dtaidistance/dtw_visualisation.py b/dtaidistance/dtw_visualisation.py index 7a94bf14..c50df8f2 100644 --- a/dtaidistance/dtw_visualisation.py +++ b/dtaidistance/dtw_visualisation.py @@ -6,7 +6,7 @@ Dynamic Time Warping (DTW) visualisations. :author: Wannes Meert -:copyright: Copyright 2017-2022 KU Leuven, DTAI Research Group. +:copyright: Copyright 2017-2024 KU Leuven, DTAI Research Group. :license: Apache License, Version 2.0, see LICENSE for details. """ @@ -100,11 +100,12 @@ def plot_warp(from_s, to_s, new_s, path, filename=None, fig=None, axs=None): def plot_warping(s1, s2, path, filename=None, fig=None, axs=None, series_line_options=None, warping_line_options=None): - """Plot the optimal warping between to sequences. + """Plot the optimal warping between two sequences. :param s1: From sequence. :param s2: To sequence. :param path: Optimal warping path. + Can be computed with methods like :meth:`dtaidistance.dtw.warping_path`. :param filename: Filename path (optional). :param fig: Matplotlib Figure object :param axs: Array of Matplotlib axes.Axes objects (length == 2) diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index d433ad1a..5bae1031 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -6,19 +6,22 @@ Preprocessing time series. :author: Wannes Meert -:copyright: Copyright 2021-2022 KU Leuven, DTAI Research Group. +:copyright: Copyright 2021-2024 KU Leuven, DTAI Research Group. :license: Apache License, Version 2.0, see LICENSE for details. """ from .exceptions import NumpyException, ScipyException -def differencing(series, smooth=None): +def differencing(series, smooth=None, diff_args=None): """Differencing series. :param series: Time series (must be numpy compatible) - :param smooth: Smooth the series by removing the `smooth` percentage ([0-1]) - of highest frequency. + :param smooth: Smooth the series by removing the highest frequencies. + The cut-off frequency is computed using the `smooth` argument. This + fraction (a number between 0.0 and 1.0) of the highest frequencies is + removed. + :param diff_args: Arguments to pass the numpy.diff :return: Differenced Numpy array of length len(series) - 1 """ try: @@ -34,18 +37,20 @@ def differencing(series, smooth=None): axis = 0 else: axis = 1 - series = np.diff(series, n=1, axis=axis) if smooth is not None: fs = 100 # sample rate, Hz cutoff = fs * smooth # cut off frequency, Hz nyq = 0.5 * fs # Nyquist frequency b, a = signal.butter(2, cutoff / nyq, btype='low', analog=False, output='ba') try: - series = signal.filtfilt(b, a, series, axis=axis) + series = signal.filtfilt(b, a, series, axis=axis, method="gust") except ValueError as exc: raise ValueError("Cannot apply smoothing, " "see the Scipy exception above to solve the problem " "or disable smoothing by setting smooth to None") from exc + if diff_args is None: + diff_args = {} + series = np.diff(series, n=1, axis=axis, **diff_args) return series diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index 72c47177..017c8da0 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -3,6 +3,7 @@ from ..util import SeriesContainer from ..subsequence.subsequencealignment import subsequence_alignment from ..exceptions import MatplotlibException +from ..similarity import distance_to_similarity class SymbolAlignment: @@ -15,6 +16,35 @@ def __init__(self, codebook): self.codebook = codebook self.use_c = False self.symbols = None + self._agg_fn = self.agg_min + self._agg_args = None + + def set_agg_min(self): + self._agg_fn = self.agg_min + self._agg_args = None + + def agg_min(self, patterns, max_value): + return np.argmin(patterns, axis=2).astype(int) + + def set_agg_prob(self, window=10): + self._agg_fn = self.agg_prob + self._agg_args = (window,) + + def agg_prob(self, patterns, max_value): + window = self._agg_args[0] + tlen = patterns.shape[1] + patterns = distance_to_similarity(patterns, r=max_value, method='exponential') + # Softmax + exp_x = np.exp(patterns - np.max(patterns, axis=2)[:, :, np.newaxis]) + logprobs = np.log(exp_x / np.sum(exp_x, axis=2)[:, :, np.newaxis]) + best = np.zeros((patterns.shape[0], tlen), dtype=int) + for ti in range(window): + best[:, ti] = np.argmax(logprobs[:, :ti, :].sum(axis=1), axis=1) + for ti in range(window, tlen-window): + best[:, ti] = np.argmax(logprobs[:, ti:ti+window, :].sum(axis=1), axis=1) + for ti in range(tlen-window, tlen): + best[:, ti] = np.argmax(logprobs[:, ti:, :].sum(axis=1), axis=1) + return best def align(self, series): """Perform alignment. @@ -25,15 +55,18 @@ def align(self, series): patterns = np.zeros((len(sc), sc.get_max_length(), len(self.codebook) + 1)) patterns[:, :, :] = np.inf + max_value = 0 for sidx in range(len(sc)): for midx in range(len(self.codebook)): medoidd = np.array(self.codebook[midx]) sa = subsequence_alignment(medoidd, sc[sidx], use_c=self.use_c) for match in sa.kbest_matches(k=None): patterns[sidx, match.segment[0]:match.segment[1], midx] = match.value + max_value = max(max_value, match.value) patterns[:, :, len(self.codebook)] = 0 + print(f"{np.max(patterns)=}") patterns[:, :, len(self.codebook)] = np.max(patterns) + 1 - best_patterns = np.argmin(patterns, axis=2).astype(int) + best_patterns = self._agg_fn(patterns, max_value) self.symbols = best_patterns return best_patterns @@ -81,12 +114,14 @@ def plot(self, series, sequences, sequences_idx, labels=None, filename=None, fig sc = SeriesContainer(series) fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex=True, sharey="col", figsize=figsize) for r in range(series.shape[0]): - avg_value = series[r, :].mean() + # avg_value = series[r, :].mean() + avg_value = series.min() + (series.max() - series.min()) / 2 axs[r].plot(series[r, :]) if labels is not None: axs[r].set_ylabel(f"L={labels[r]}") for symbol, (fidx, lidx) in zip(sequences[r], sequences_idx[r]): - axs[r].text(fidx, avg_value, str(symbol)) + axs[r].vlines(fidx, series.min(), series.max(), colors='k', alpha=0.2) + axs[r].text(fidx, avg_value, str(symbol), alpha=0.5) if filename is not None: fig.savefig(filename) plt.close(fig) From 706c7441f77d312f497a271fffc4f05f23c52cb2 Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 13 Mar 2024 11:40:54 +0100 Subject: [PATCH 07/33] improve symbolization --- dtaidistance/preprocessing.py | 77 +++++++++++++---- dtaidistance/similarity.py | 15 +++- .../subsequence/subsequencealignment.py | 13 ++- dtaidistance/symbolization/alignment.py | 86 ++++++++++++++++--- tests/test_symbolization.py | 4 +- 5 files changed, 162 insertions(+), 33 deletions(-) diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index 5bae1031..580510fe 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -17,9 +17,9 @@ def differencing(series, smooth=None, diff_args=None): """Differencing series. :param series: Time series (must be numpy compatible) - :param smooth: Smooth the series by removing the highest frequencies. + :param smooth: Smooth the differenced series by removing the highest frequencies. The cut-off frequency is computed using the `smooth` argument. This - fraction (a number between 0.0 and 1.0) of the highest frequencies is + fraction (a number between 0.0 and 0.5) of the highest frequencies is removed. :param diff_args: Arguments to pass the numpy.diff :return: Differenced Numpy array of length len(series) - 1 @@ -28,29 +28,72 @@ def differencing(series, smooth=None, diff_args=None): import numpy as np except ImportError: raise NumpyException("Differencing requires Numpy") - try: - from scipy import signal - except ImportError: - raise ScipyException("Differencing requires Scipy") if isinstance(series, np.ndarray): if len(series.shape) == 1: axis = 0 else: axis = 1 - if smooth is not None: - fs = 100 # sample rate, Hz - cutoff = fs * smooth # cut off frequency, Hz - nyq = 0.5 * fs # Nyquist frequency - b, a = signal.butter(2, cutoff / nyq, btype='low', analog=False, output='ba') - try: - series = signal.filtfilt(b, a, series, axis=axis, method="gust") - except ValueError as exc: - raise ValueError("Cannot apply smoothing, " - "see the Scipy exception above to solve the problem " - "or disable smoothing by setting smooth to None") from exc if diff_args is None: diff_args = {} series = np.diff(series, n=1, axis=axis, **diff_args) + if smooth is not None: + series = smoothing(series, smooth) + return series + + +def smoothing(series, smooth): + """Smooth the series. + + :param series: Time series (must be numpy compatible) + :param smooth: Smooth the series by removing the highest frequencies. + The cut-off frequency is computed using the `smooth` argument. This + fraction (a number between 0.0 and 0.5) of the highest frequencies is + removed. + :return: Smoothed series as Numpy array + """ + try: + import numpy as np + except ImportError: + raise NumpyException("Smoothing requires Numpy") + try: + from scipy import signal + except ImportError: + raise ScipyException("Smoothing requires Scipy") + if isinstance(series, np.ndarray): + if len(series.shape) == 1: + axis = 0 + else: + axis = 1 + else: + axis = 0 + fs = 100 # sample rate, Hz + cutoff = fs * smooth # cut-off frequency, Hz + nyq = 0.5 * fs # Nyquist frequency + Wn = cutoff / nyq + try: + b, a = signal.butter(N=2, Wn=Wn, btype='low', analog=False, output='ba') + except ValueError as exc: + raise ValueError("Cannot construct filter, change the smoothing factor. " + f"Requires 0 0. + :param minlength: Minimal length of the matched sequence. + If k is set to None, matches with one value can occur if minlength is set to 1. :return: Yield an SAMatch object """ self.align() @@ -222,6 +224,11 @@ def kbest_matches(self, k=1, overlap=0): b, e = match.segment cur_overlap = min(overlap, e - b - 1) mb, me = best_idx + 1 - (e - b) + cur_overlap, best_idx + 1 + if ((minlength is not None and e-b+1 < minlength) or + (maxlength is not None and e-b+1 > maxlength)): + # Found sequence is too short or too long + matching[best_idx] = maxv + continue if np.isinf(np.max(matching[mb:me])): # No overlapping matches matching[best_idx] = maxv diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index 017c8da0..398f8ae5 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -1,3 +1,4 @@ +import math import numpy as np from ..util import SeriesContainer @@ -7,13 +8,17 @@ class SymbolAlignment: - def __init__(self, codebook): + def __init__(self, codebook, maxcompression=0.5, maxexpansion=2): """Translate a time series with continuous values to a list of discrete symbols based on motifs in a codebook. :param codebook: List of motifs. + :param maxcompression: Maximally allowed compression of a codeword for it to be recognized + :param maxexpansion: Maximally allowed expansion of a codeword for it to be recognized """ self.codebook = codebook + self.maxcompression = maxcompression + self.maxexpansion = maxexpansion self.use_c = False self.symbols = None self._agg_fn = self.agg_min @@ -60,16 +65,63 @@ def align(self, series): for midx in range(len(self.codebook)): medoidd = np.array(self.codebook[midx]) sa = subsequence_alignment(medoidd, sc[sidx], use_c=self.use_c) - for match in sa.kbest_matches(k=None): - patterns[sidx, match.segment[0]:match.segment[1], midx] = match.value + for match in sa.kbest_matches(k=None, + minlength=math.floor(len(medoidd)*self.maxcompression), + maxlength=math.ceil(len(medoidd)*self.maxexpansion)): + patterns[sidx, match.segment[0]:match.segment[1]+1, midx] = match.value max_value = max(max_value, match.value) patterns[:, :, len(self.codebook)] = 0 - print(f"{np.max(patterns)=}") patterns[:, :, len(self.codebook)] = np.max(patterns) + 1 best_patterns = self._agg_fn(patterns, max_value) self.symbols = best_patterns return best_patterns + def align2(self, series): + """Perform alignment. + + Based on the Matching Pursuit algorithm. + + :param series: List of time series or a numpy array + """ + sc = SeriesContainer(series) + + for sidx in range(len(sc)): + curseries = sc[sidx].copy() + patterns = [] + for midx in range(len(self.codebook)): + medoidd = np.array(self.codebook[midx]) + sa = subsequence_alignment(medoidd, curseries, use_c=self.use_c) + for match in sa.kbest_matches(k=None, + minlength=math.floor(len(medoidd)*self.maxcompression), + maxlength=math.ceil(len(medoidd)*self.maxexpansion)): + patterns.append((midx, match.segment[0], match.segment[1]+1, + curseries[match.segment[0]:match.segment[1]+1])) + print(f"Series {sidx}: found {len(patterns)} patterns") + D = np.zeros((len(patterns), len(curseries))) + for i, (_, bi, ei, ts) in enumerate(patterns): + D[i, bi:ei] = ts + + noword = len(self.codebook) + best_patterns = np.full(series.shape, noword, dtype=int) + bestpatvalue = np.inf + its = 0 + while bestpatvalue > 0: + print(f"Iteration {sidx} -- {its}") + dotprod = np.einsum('j,kj->k', curseries, D) + bestpatidx = np.argmax(dotprod) + bestpatvalue = dotprod[bestpatidx] + if bestpatvalue == 0: + break + pattern = patterns[bestpatidx] + freeidxs = best_patterns[sidx, pattern[1]:pattern[2]] == noword + best_patterns[sidx, pattern[1]:pattern[2]][freeidxs] = pattern[0] + curseries[pattern[1]:pattern[2]] = 0 + D[bestpatidx, :] = 0 + its += 1 + + self.symbols = best_patterns + return best_patterns + def align_fast(self, series): use_c = self.use_c self.use_c = True @@ -103,7 +155,8 @@ def hangover(self, symbols, threshold=4): sequences_idx.append(sequence_idx) return sequences, sequences_idx - def plot(self, series, sequences, sequences_idx, labels=None, filename=None, figsize=None): + def plot(self, series, sequences, sequences_idx, ylabels=None, filename=None, figsize=None, + xlimits=None, symbollabels=None): try: import matplotlib.pyplot as plt from matplotlib import gridspec @@ -113,15 +166,28 @@ def plot(self, series, sequences, sequences_idx, labels=None, filename=None, fig figsize = (12, 8) sc = SeriesContainer(series) fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex=True, sharey="col", figsize=figsize) + if len(sc) == 1: + axs = [axs] for r in range(series.shape[0]): # avg_value = series[r, :].mean() avg_value = series.min() + (series.max() - series.min()) / 2 - axs[r].plot(series[r, :]) - if labels is not None: - axs[r].set_ylabel(f"L={labels[r]}") + if xlimits is None: + axs[r].plot(series[r, :]) + else: + axs[r].plot(series[r, xlimits[0]:xlimits[1]]) + if ylabels is not None: + axs[r].set_ylabel(f"L={ylabels[r]}") for symbol, (fidx, lidx) in zip(sequences[r], sequences_idx[r]): - axs[r].vlines(fidx, series.min(), series.max(), colors='k', alpha=0.2) - axs[r].text(fidx, avg_value, str(symbol), alpha=0.5) + if xlimits is None: + delta = 0 + elif xlimits[0] <= fidx <= xlimits[1]: + delta = xlimits[0] + else: + continue + axs[r].vlines(fidx-delta, series.min(), series.max(), colors='k', alpha=0.2) + if symbollabels is not None: + symbol = symbollabels(symbol) + axs[r].text(fidx-delta, avg_value, str(symbol), alpha=0.5) if filename is not None: fig.savefig(filename) plt.close(fig) diff --git a/tests/test_symbolization.py b/tests/test_symbolization.py index 041d0a25..63e05d9e 100644 --- a/tests/test_symbolization.py +++ b/tests/test_symbolization.py @@ -82,6 +82,6 @@ def test_trace1(): sequences, sequences_idx = sa.hangover(symbols, threshold=4) if directory is not None and not dtwvis.test_without_visualization(): sa.plot(series, sequences, sequences_idx, - labels=labels, filename=directory / "series.png") + ylabels=labels, filename=directory / "series.png") sa.plot(seriesd, sequences, sequences_idx, - labels=labels, filename=directory / "seriesd.png") + ylabels=labels, filename=directory / "seriesd.png") From c866f697444b2d22e29cecc350be28c576b95a3d Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 15 Mar 2024 18:49:13 +0100 Subject: [PATCH 08/33] symbolization --- dtaidistance/preprocessing.py | 39 +++++++++++++++++++ .../subsequence/subsequencealignment.py | 6 ++- dtaidistance/symbolization/alignment.py | 36 +++++++++++------ 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index 580510fe..e2377431 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -97,6 +97,45 @@ def logdomain(series): return series +def mixedlinearlogdomain(series, c=10): + """Transform to a mixture between linear and log domain + (and retain the sign of the signal). + + The transformation is (for positive values): + x if x<=c + c+ln(x-c+1) if x>c + + :param series: Time series (must be numpy compatible) + :param c: Switch between linear to log domain at this value, should be <= 1. + If two numbers are given as a tuple, the first one is used for positive + values, the second for negative values. + """ + try: + import numpy as np + except ImportError: + raise NumpyException("Transforming to log domain requires Numpy") + + if type(c) in [tuple, list]: + pos = np.heaviside(series, 1) + seriesp = pos*series + seriesn = (1-pos)*np.abs(series) + cc = c[0] + step = np.heaviside(seriesp - cc, 1) + seriesp = (1 - step) * seriesp + step * (cc + np.log1p(step * (seriesp - cc))) + cc = -c[1] + step = np.heaviside(seriesn - cc, 1) + seriesn = (1 - step) * seriesn + step * (cc + np.log1p(step * (seriesn - cc))) + series = -seriesn + seriesp + else: + sign = np.sign(series) + series = np.abs(series) + step = np.heaviside(series-c, 1) + # should be vectorized + # step is in log1p to avoid nan + series = sign * ((1-step)*series + step*(c+np.log1p(step*(series-c)))) + return series + + def znormal(series): """Z-normalize the time series. diff --git a/dtaidistance/subsequence/subsequencealignment.py b/dtaidistance/subsequence/subsequencealignment.py index 16e92c96..87f420db 100644 --- a/dtaidistance/subsequence/subsequencealignment.py +++ b/dtaidistance/subsequence/subsequencealignment.py @@ -48,14 +48,16 @@ dtw_cc = None -def subsequence_alignment(query, series, use_c=False): +def subsequence_alignment(query, series, penalty=0.1, use_c=False): """See SubsequenceAligment. :param query: :param series: + :param penalty: + :param use_c: :return: """ - sa = SubsequenceAlignment(query, series, use_c=use_c) + sa = SubsequenceAlignment(query, series, penalty=penalty, use_c=use_c) sa.align() return sa diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index 398f8ae5..334b0bf8 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -54,6 +54,8 @@ def agg_prob(self, patterns, max_value): def align(self, series): """Perform alignment. + For each time point, select the best matching motif. + :param series: List of time series or a numpy array """ sc = SeriesContainer(series) @@ -88,6 +90,7 @@ def align2(self, series): for sidx in range(len(sc)): curseries = sc[sidx].copy() patterns = [] + max_value = 0 for midx in range(len(self.codebook)): medoidd = np.array(self.codebook[midx]) sa = subsequence_alignment(medoidd, curseries, use_c=self.use_c) @@ -95,28 +98,39 @@ def align2(self, series): minlength=math.floor(len(medoidd)*self.maxcompression), maxlength=math.ceil(len(medoidd)*self.maxexpansion)): patterns.append((midx, match.segment[0], match.segment[1]+1, - curseries[match.segment[0]:match.segment[1]+1])) - print(f"Series {sidx}: found {len(patterns)} patterns") - D = np.zeros((len(patterns), len(curseries))) - for i, (_, bi, ei, ts) in enumerate(patterns): - D[i, bi:ei] = ts + curseries[match.segment[0]:match.segment[1]+1], match.value)) + max_value = max(max_value, match.value) + # print(f"Series {sidx}: found {len(patterns)} patterns") + # D = np.zeros((len(patterns), len(curseries))) + # for i, (_, bi, ei, ts) in enumerate(patterns): + # D[i, bi:ei] = ts + D = np.zeros(len(patterns)) + L = np.zeros(len(patterns)) + for i, (_, bi, ei, ts, v) in enumerate(patterns): + D[i] = v + L[i] = ei - bi + 1 + # Trade-off between length and similarity + S = distance_to_similarity(D, r=max_value, method='exponential') * L noword = len(self.codebook) best_patterns = np.full(series.shape, noword, dtype=int) bestpatvalue = np.inf its = 0 while bestpatvalue > 0: - print(f"Iteration {sidx} -- {its}") - dotprod = np.einsum('j,kj->k', curseries, D) - bestpatidx = np.argmax(dotprod) - bestpatvalue = dotprod[bestpatidx] + # print(f"Iteration {sidx} -- {its}") + # dotprod = np.einsum('j,kj->k', curseries, D) + # bestpatidx = np.argmax(dotprod) + # bestpatvalue = dotprod[bestpatidx] + bestpatidx = np.argmax(S) + bestpatvalue = S[bestpatidx] if bestpatvalue == 0: break pattern = patterns[bestpatidx] freeidxs = best_patterns[sidx, pattern[1]:pattern[2]] == noword best_patterns[sidx, pattern[1]:pattern[2]][freeidxs] = pattern[0] - curseries[pattern[1]:pattern[2]] = 0 - D[bestpatidx, :] = 0 + # curseries[pattern[1]:pattern[2]] = 0 + # D[bestpatidx, :] = 0 + S[bestpatidx] = 0 its += 1 self.symbols = best_patterns From 475ff9669f8734980c382a9cb048687e5ea97bee Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 15 Mar 2024 18:54:23 +0100 Subject: [PATCH 09/33] innerdist to ndim --- dtaidistance/dtw_ndim.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/dtaidistance/dtw_ndim.py b/dtaidistance/dtw_ndim.py index 5637877a..59c54fed 100644 --- a/dtaidistance/dtw_ndim.py +++ b/dtaidistance/dtw_ndim.py @@ -19,6 +19,7 @@ from .dtw import _check_library, SeriesContainer, _distance_matrix_idxs, distances_array_to_matrix,\ _distance_matrix_length, _complete_block from . import util_numpy +from . import innerdistance from .exceptions import NumpyException try: @@ -86,7 +87,8 @@ def ub_euclidean(s1, s2): def distance(s1, s2, window=None, max_dist=None, max_step=None, max_length_diff=None, penalty=None, psi=None, - use_c=False, use_pruning=False, only_ub=False): + use_c=False, use_pruning=False, only_ub=False, + inner_dist=innerdistance.default): """(Dependent) Dynamic Time Warping using multidimensional sequences. Assumes the first dimension to be the sequence item index, and the second @@ -143,7 +145,8 @@ def distance(s1, s2, window=None, max_dist=None, penalty=penalty, psi=psi, use_pruning=use_pruning, - only_ub=only_ub) + only_ub=only_ub, + inner_dist=inner_dist) if np is None: raise NumpyException("Numpy is required for the dtw_ndim.distance method " "(Numpy is not required for the distance_fast method that uses the C library") @@ -168,6 +171,7 @@ def distance(s1, s2, window=None, max_dist=None, penalty = 0 else: penalty *= penalty + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=False) if psi is None: psi = 0 length = min(c + 1, abs(r - c) + 2 * (window - 1) + 1 + 1 + 1) @@ -203,7 +207,8 @@ def distance(s1, s2, window=None, max_dist=None, if psi != 0 and j_start == 0 and i < psi: dtw[i1 * length] = 0 for j in range(j_start, j_end): - d = np.sum((s1[i] - s2[j]) ** 2) + # d = np.sum((s1[i] - s2[j]) ** 2) + d = idist_fn(s1[i], s2[j]) if d > max_step: continue assert j + 1 - skip >= 0 @@ -237,11 +242,13 @@ def distance(s1, s2, window=None, max_dist=None, d = math.sqrt(d) if max_dist and d > max_dist: d = inf + d = result_fn(d) return d def distance_fast(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None, penalty=None, psi=None, use_pruning=False, only_ub=False): + max_step=None, max_length_diff=None, penalty=None, psi=None, use_pruning=False, only_ub=False, + inner_dist=innerdistance.default): """Fast C version of :meth:`distance`. Note: the series are expected to be arrays of the type ``double``. @@ -268,7 +275,8 @@ def distance_fast(s1, s2, window=None, max_dist=None, penalty=penalty, psi=psi, use_pruning=use_pruning, - only_ub=only_ub) + only_ub=only_ub, + inner_dist=inner_dist) return d From 56b397827f307fb279dcdd540545647d33466908 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sun, 17 Mar 2024 16:50:38 +0100 Subject: [PATCH 10/33] inner dist --- dtaidistance/dtw.py | 10 ++++++---- dtaidistance/dtw_ndim.py | 12 ++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index c48e6da3..728704d0 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -702,7 +702,8 @@ def distance_matrix_wrapper(seqs, **kwargs): def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, block=None, compact=False, parallel=False, - use_c=False, use_mp=False, show_progress=False, only_triu=False): + use_c=False, use_mp=False, show_progress=False, only_triu=False, + inner_dist=innerdistance.default): """Distance matrix for all sequences in s. :param s: Iterable of series @@ -753,7 +754,8 @@ def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, 'max_length_diff': max_length_diff, 'penalty': penalty, 'psi': psi, - 'use_pruning': use_pruning + 'use_pruning': use_pruning, + 'inner_dist': inner_dist } s = SeriesContainer.wrap(s) if max_length_diff is None: @@ -928,7 +930,7 @@ def _distance_matrix_length(block, nb_series): def distance_matrix_fast(s, max_dist=None, use_pruning=False, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, block=None, compact=False, parallel=True, use_mp=False, - only_triu=False): + only_triu=False, inner_dist=innerdistance.default): """Same as :meth:`distance_matrix` but with different defaults to choose the fast parallized C version (use_c = True and parallel = True). @@ -947,7 +949,7 @@ def distance_matrix_fast(s, max_dist=None, use_pruning=False, max_length_diff=No max_step=max_step, penalty=penalty, psi=psi, block=block, compact=compact, parallel=parallel, use_c=True, use_mp=use_mp, show_progress=False, - only_triu=only_triu) + only_triu=only_triu, inner_dist=inner_dist) def warping_path(from_s, to_s, include_distance=False, **kwargs): diff --git a/dtaidistance/dtw_ndim.py b/dtaidistance/dtw_ndim.py index 59c54fed..67f60ba2 100644 --- a/dtaidistance/dtw_ndim.py +++ b/dtaidistance/dtw_ndim.py @@ -330,7 +330,8 @@ def distance_matrix_python(s, block=None, show_progress=False, max_length_diff=N def distance_matrix(s, ndim=None, max_dist=None, use_pruning=False, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, block=None, compact=False, parallel=False, - use_c=False, use_mp=False, show_progress=False, only_triu=False): + use_c=False, use_mp=False, show_progress=False, only_triu=False, + inner_dist=innerdistance.default): """Distance matrix for all n-dimensional sequences in s. This method returns the dependent DTW (DTW_D) [1] distance between two @@ -392,7 +393,8 @@ def distance_matrix(s, ndim=None, max_dist=None, use_pruning=False, max_length_d 'max_length_diff': max_length_diff, 'penalty': penalty, 'psi': psi, - 'use_pruning': use_pruning + 'use_pruning': use_pruning, + 'inner_dist': inner_dist } s = SeriesContainer.wrap(s) if ndim is None: @@ -451,13 +453,15 @@ def distance_matrix(s, ndim=None, max_dist=None, use_pruning=False, max_length_d def distance_matrix_fast(s, ndim=None, max_dist=None, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, - block=None, compact=False, parallel=True, only_triu=False): + block=None, compact=False, parallel=True, only_triu=False, + inner_dist=innerdistance.default): """Fast C version of :meth:`distance_matrix`.""" _check_library(raise_exception=True, include_omp=parallel) return distance_matrix(s, ndim=ndim, max_dist=max_dist, max_length_diff=max_length_diff, window=window, max_step=max_step, penalty=penalty, psi=psi, block=block, compact=compact, parallel=parallel, - use_c=True, show_progress=False, only_triu=only_triu) + use_c=True, show_progress=False, only_triu=only_triu, + inner_dist=inner_dist) def warping_path(from_s, to_s, **kwargs): From 9d69aaa78b457bc29a1f3d88f6bc3509e932dc91 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 18 Mar 2024 12:56:31 +0100 Subject: [PATCH 11/33] symbolization --- dtaidistance/symbolization/alignment.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index 334b0bf8..bb28d80a 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -78,14 +78,21 @@ def align(self, series): self.symbols = best_patterns return best_patterns - def align2(self, series): + def align2(self, series, max_overlap=None): """Perform alignment. - Based on the Matching Pursuit algorithm. + Inspired on the Matching Pursuit algorithm. + But only one motif is matched to a subsequence. No aggregation within the same subsequence. :param series: List of time series or a numpy array + :param max_overlap: Maximal overlap when matching codewords. + If not given, this is based on maxcompression and maxexpansion. """ sc = SeriesContainer(series) + noword = len(self.codebook) + best_patterns = np.full(series.shape, noword, dtype=int) + if max_overlap is None: + max_overlap = max(self.maxcompression, 1./self.maxexpansion) for sidx in range(len(sc)): curseries = sc[sidx].copy() @@ -103,17 +110,19 @@ def align2(self, series): # print(f"Series {sidx}: found {len(patterns)} patterns") # D = np.zeros((len(patterns), len(curseries))) # for i, (_, bi, ei, ts) in enumerate(patterns): - # D[i, bi:ei] = ts + # D[i, bi:ei] = ts # should be normalized for matching pursuit if we use the factors D = np.zeros(len(patterns)) - L = np.zeros(len(patterns)) + L = np.zeros(len(patterns), dtype=int) + B = np.zeros(len(patterns), dtype=int) + E = np.zeros(len(patterns), dtype=int) for i, (_, bi, ei, ts, v) in enumerate(patterns): D[i] = v L[i] = ei - bi + 1 + B[i] = bi + E[i] = ei + 1 # Use next position for length and overlap computation # Trade-off between length and similarity S = distance_to_similarity(D, r=max_value, method='exponential') * L - noword = len(self.codebook) - best_patterns = np.full(series.shape, noword, dtype=int) bestpatvalue = np.inf its = 0 while bestpatvalue > 0: @@ -128,6 +137,10 @@ def align2(self, series): pattern = patterns[bestpatidx] freeidxs = best_patterns[sidx, pattern[1]:pattern[2]] == noword best_patterns[sidx, pattern[1]:pattern[2]][freeidxs] = pattern[0] + # Remove patterns with more overlap than max_overlap + overlaps = (np.maximum(0, np.minimum(E[bestpatidx], E) - + np.maximum(B[bestpatidx], B)) / L[bestpatidx]) > max_overlap + S[overlaps] = 0 # curseries[pattern[1]:pattern[2]] = 0 # D[bestpatidx, :] = 0 S[bestpatidx] = 0 From 86472c0d8f47b4f664aae523494e05d6e07812d7 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 18 Mar 2024 14:43:52 +0100 Subject: [PATCH 12/33] distance to similarity improvements --- dtaidistance/similarity.py | 75 ++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index e725baaf..172a6634 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -4,7 +4,7 @@ np = None -def distance_to_similarity(D, r=None, method='exponential', return_params=False): +def distance_to_similarity(D, r=None, a=None, method='exponential', return_params=False, cover_quantile=False): """Transform a distance matrix to a similarity matrix. The available methods are: @@ -12,7 +12,7 @@ def distance_to_similarity(D, r=None, method='exponential', return_params=False) r is max(D) if not given - Gaussian: e^(-D^2 / r^2) r is max(D) if not given - - Reciprocal: 1 / (r + D) + - Reciprocal: 1 / (r + D*a) r is 1 if not given - Reverse: r - D r is min(D) + max(D) if not given @@ -29,21 +29,43 @@ def distance_to_similarity(D, r=None, method='exponential', return_params=False) :param D: The distance matrix :param r: A scaling or smoothing parameter. :param method: One of 'exponential', 'gaussian', 'reciprocal', 'reverse' + :param return_params: Return the value used for parameter r + :param cover_quantile: Compute r such that the function covers the `cover_quantile` fraction of the data. + Expects a value in [0,1]. If a tuple (quantile, value) is given, then r and a are set such that + at quantile the given value is reached (if not given, value is 0.01). :return: Similarity matrix S """ + if cover_quantile is not False: + if type(cover_quantile) in [tuple, list]: + cover_quantile, cover_quantile_target = cover_quantile + else: + cover_quantile_target = 0.01 + else: + cover_quantile_target = None method = method.lower() if method == 'exponential': if r is None: - r = np.max(D) + if cover_quantile is False: + r = np.max(D) + else: + r = -np.quantile(D, cover_quantile) / np.log(cover_quantile_target) S = np.exp(-D / r) elif method == 'gaussian': if r is None: - r = np.max(D) + if cover_quantile is False: + r = np.max(D) + else: + r = np.sqrt(-np.quantile(D, cover_quantile) ** 2 / np.log(cover_quantile_target)) S = np.exp(-np.power(D, 2) / r**2) elif method == 'reciprocal': if r is None: r = 1 - S = 1 / (r + D) + if a is None: + if cover_quantile is False: + a = 1 + else: + a = (1 - cover_quantile_target * r) / (cover_quantile_target * np.quantile(D, cover_quantile)) + S = 1 / (r + D*a) elif method == 'reverse': if r is None: r = np.min(D) + np.max(D) @@ -56,12 +78,14 @@ def distance_to_similarity(D, r=None, method='exponential', return_params=False) return S -def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False, keep_sign=False): +def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False, keep_sign=False, + cover_quantile=False): """Squash a function monotonically to a range between 0 and 1. The available methods are: - Logistic: 1 / (1 + e^(-(X-x0) / r) - - Gaussian: e^(-(X-x0)^2 / r^2) + - Gaussian: 1 - e^(-(X-x0)^2 / r^2) + - Exponential: 1 - e^(-(X-x0) / r) Example usage:: @@ -77,10 +101,20 @@ def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False :param X: Distances values :param r: The slope of the squashing (see the formula above) :param x0: The midpoint of the squashing (see the formula above) - :param method: The choice of sqaush function: logistic or gaussian - :param keep_sign: Negative values should stay negative. + :param method: The choice of sqaush function: logistic, gaussian, or exponential + :param keep_sign: Negative values should stay negative :param return_params: Also return the used values for r and X0 + :param cover_quantile: Compute r such that the function covers the `cover_quantile` fraction of the data. + Expects a value in [0,1]. If a tuple (quantile, value) is given, then r and a are set such that + at quantile the given value is reached (if not given, value is 0.01). """ + if cover_quantile is not False: + if type(cover_quantile) in [tuple, list]: + cover_quantile, cover_quantile_target = cover_quantile + else: + cover_quantile_target = 0.99 + else: + cover_quantile_target = None result = None if keep_sign: Xs = np.sign(X) @@ -91,18 +125,37 @@ def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False if method == "gaussian": x0 = 0 # not supported for gaussian if r is None: - r = 1 + if cover_quantile is False: + r = 1 + else: + r = np.sqrt(-(np.quantile(X, cover_quantile)-x0)**2/np.log(1-cover_quantile_target)) if base is None: result = 1 - np.exp(-np.power(X - x0, 2) / r**2) Xz = 1 - np.exp(-np.power(0 - x0, 2) / r**2) else: result = 1 - np.power(base, -np.power(X - x0, 2) / r**2) Xz = 1 - np.power(base, -np.power(0 - x0, 2) / r**2) + if method == "exponential": + x0 = 0 # not supported for exponential + if r is None: + if cover_quantile is False: + r = 1 + else: + r = -(np.quantile(X, cover_quantile)-x0)/np.log(1-cover_quantile_target) + if base is None: + result = 1 - np.exp(-(X - x0) / r) + Xz = 1 - np.exp(-(0 - x0) / r) + else: + result = 1 - np.power(base, -(X - x0) / r) + Xz = 1 - np.power(base, -(0 - x0) / r) elif method == "logistic": if x0 is None: x0 = np.mean(X) if r is None: - r = x0 / 6 + if cover_quantile is False: + r = x0 / 6 + else: + r = -(np.quantile(X, cover_quantile)-x0) / np.log(1/cover_quantile_target-1) if base is None: result = 1 / (1 + np.exp(-(X - x0) / r)) Xz = 1 / (1 + np.exp(-(0 - x0) / r)) From 58ab65edd89688a33ad7e2e241fb9d9f7ef78513 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 18 Mar 2024 16:47:40 +0100 Subject: [PATCH 13/33] distance to similarity improvements --- dtaidistance/similarity.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 172a6634..572bc654 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -31,15 +31,15 @@ def distance_to_similarity(D, r=None, a=None, method='exponential', return_param :param method: One of 'exponential', 'gaussian', 'reciprocal', 'reverse' :param return_params: Return the value used for parameter r :param cover_quantile: Compute r such that the function covers the `cover_quantile` fraction of the data. - Expects a value in [0,1]. If a tuple (quantile, value) is given, then r and a are set such that - at quantile the given value is reached (if not given, value is 0.01). + Expects a value in [0,1]. If a tuple (quantile, value) is given, then r (and a) are set such that + at the quantile the given value is reached (if not given, value is 1-quantile). :return: Similarity matrix S """ if cover_quantile is not False: if type(cover_quantile) in [tuple, list]: cover_quantile, cover_quantile_target = cover_quantile else: - cover_quantile_target = 0.01 + cover_quantile_target = 1 - cover_quantile else: cover_quantile_target = None method = method.lower() @@ -105,14 +105,14 @@ def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False :param keep_sign: Negative values should stay negative :param return_params: Also return the used values for r and X0 :param cover_quantile: Compute r such that the function covers the `cover_quantile` fraction of the data. - Expects a value in [0,1]. If a tuple (quantile, value) is given, then r and a are set such that - at quantile the given value is reached (if not given, value is 0.01). + Expects a value in [0,1]. If a tuple (quantile, value) is given, then r (and a) are set such that + at the quantile the given value is reached (if not given, value is quantile). """ if cover_quantile is not False: if type(cover_quantile) in [tuple, list]: cover_quantile, cover_quantile_target = cover_quantile else: - cover_quantile_target = 0.99 + cover_quantile_target = cover_quantile else: cover_quantile_target = None result = None @@ -144,10 +144,10 @@ def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False r = -(np.quantile(X, cover_quantile)-x0)/np.log(1-cover_quantile_target) if base is None: result = 1 - np.exp(-(X - x0) / r) - Xz = 1 - np.exp(-(0 - x0) / r) + Xz = 1 - np.exp(x0 / r) else: result = 1 - np.power(base, -(X - x0) / r) - Xz = 1 - np.power(base, -(0 - x0) / r) + Xz = 1 - np.power(base, x0 / r) elif method == "logistic": if x0 is None: x0 = np.mean(X) From f521bf462db846da5620aaddaefa6b035dcb2ea3 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 26 Mar 2024 09:59:19 +0100 Subject: [PATCH 14/33] change default align method --- dtaidistance/similarity.py | 1 + dtaidistance/symbolization/alignment.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/dtaidistance/similarity.py b/dtaidistance/similarity.py index 572bc654..b78add5f 100644 --- a/dtaidistance/similarity.py +++ b/dtaidistance/similarity.py @@ -100,6 +100,7 @@ def squash(X, r=None, base=None, x0=None, method="logistic", return_params=False :param X: Distances values :param r: The slope of the squashing (see the formula above) + :param base: Use this value as base instead of e :param x0: The midpoint of the squashing (see the formula above) :param method: The choice of sqaush function: logistic, gaussian, or exponential :param keep_sign: Negative values should stay negative diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index bb28d80a..a9f2f2f1 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -25,6 +25,7 @@ def __init__(self, codebook, maxcompression=0.5, maxexpansion=2): self._agg_args = None def set_agg_min(self): + """Set mininum aggregation for the align2 method.""" self._agg_fn = self.agg_min self._agg_args = None @@ -32,6 +33,7 @@ def agg_min(self, patterns, max_value): return np.argmin(patterns, axis=2).astype(int) def set_agg_prob(self, window=10): + """Set probabilistic aggregation for the align2 method.""" self._agg_fn = self.agg_prob self._agg_args = (window,) @@ -51,10 +53,10 @@ def agg_prob(self, patterns, max_value): best[:, ti] = np.argmax(logprobs[:, ti:, :].sum(axis=1), axis=1) return best - def align(self, series): + def align2(self, series): """Perform alignment. - For each time point, select the best matching motif. + For each time point, select the best matching motif and aggregate the results. :param series: List of time series or a numpy array """ @@ -78,16 +80,16 @@ def align(self, series): self.symbols = best_patterns return best_patterns - def align2(self, series, max_overlap=None): + def align(self, series, max_overlap=None): """Perform alignment. - Inspired on the Matching Pursuit algorithm. - But only one motif is matched to a subsequence. No aggregation within the same subsequence. + Only one motif is matched to a subsequence. No aggregation within the same subsequence. :param series: List of time series or a numpy array :param max_overlap: Maximal overlap when matching codewords. If not given, this is based on maxcompression and maxexpansion. """ + # Inspired on the Matching Pursuit algorithm. sc = SeriesContainer(series) noword = len(self.codebook) best_patterns = np.full(series.shape, noword, dtype=int) From 90c151660d65946dc67f044ec1885ac7ec3b779a Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 28 Mar 2024 21:36:01 +0100 Subject: [PATCH 15/33] remove separate ndim implementations --- dtaidistance/dtw.py | 93 ++++-- dtaidistance/dtw_ndim.py | 286 ++---------------- dtaidistance/ed.py | 6 +- dtaidistance/innerdistance.py | 7 + dtaidistance/subsequence/subsequencesearch.py | 1 + dtaidistance/util.py | 5 + 6 files changed, 104 insertions(+), 294 deletions(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 728704d0..04f67445 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -156,13 +156,13 @@ def __str__(self): def lb_keogh(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None, use_c=False, inner_dist=innerdistance.default): + max_step=None, max_length_diff=None, use_c=False, use_ndim=False, inner_dist=innerdistance.default): """Lowerbound LB_KEOGH""" if use_c: return dtw_cc.lb_keogh(s1, s2, window=window, max_dist=max_dist, max_step=max_step, inner_dist=inner_dist) if window is None: window = max(len(s1), len(s2)) - idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=False) + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=use_ndim) t = 0 imin_diff = max(0, len(s1) - len(s2)) + window - 1 @@ -191,7 +191,7 @@ def distance(s1, s2, window=None, max_dist=None, max_step=None, max_length_diff=None, penalty=None, psi=None, use_c=False, use_pruning=False, only_ub=False, - inner_dist=innerdistance.default): + inner_dist=innerdistance.default, use_ndim=False): """ Dynamic Time Warping. @@ -245,7 +245,8 @@ def distance(s1, s2, psi=psi, use_pruning=use_pruning, only_ub=only_ub, - inner_dist=inner_dist) + inner_dist=inner_dist, + use_ndim=use_ndim) r, c = len(s1), len(s2) if max_length_diff is not None and abs(r - c) > max_length_diff: return inf @@ -267,7 +268,7 @@ def distance(s1, s2, penalty = 0 else: penalty *= penalty - idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=False) + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=use_ndim) psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(psi) length = min(c + 1, abs(r - c) + 2 * (window - 1) + 1 + 1 + 1) # print("length (py) = {}".format(length)) @@ -345,7 +346,7 @@ def distance(s1, s2, def distance_fast(s1, s2, window=None, max_dist=None, max_step=None, max_length_diff=None, penalty=None, psi=None, use_pruning=False, only_ub=False, - inner_dist=innerdistance.default): + inner_dist=innerdistance.default, use_ndim=False): """Same as :meth:`distance` but with different defaults to chose the fast C-based version of the implementation (use_c = True). @@ -358,16 +359,28 @@ def distance_fast(s1, s2, window=None, max_dist=None, s1 = util_numpy.verify_np_array(s1) s2 = util_numpy.verify_np_array(s2) # Move data to C library - d = dtw_cc.distance(s1, s2, - window=window, - max_dist=max_dist, - max_step=max_step, - max_length_diff=max_length_diff, - penalty=penalty, - psi=psi, - use_pruning=use_pruning, - only_ub=only_ub, - inner_dist=inner_dist) + if use_ndim is False: + d = dtw_cc.distance(s1, s2, + window=window, + max_dist=max_dist, + max_step=max_step, + max_length_diff=max_length_diff, + penalty=penalty, + psi=psi, + use_pruning=use_pruning, + only_ub=only_ub, + inner_dist=inner_dist) + else: + d = dtw_cc.distance_ndim(s1, s2, + window=window, + max_dist=max_dist, + max_step=max_step, + max_length_diff=max_length_diff, + penalty=penalty, + psi=psi, + use_pruning=use_pruning, + only_ub=only_ub, + inner_dist=inner_dist) return d @@ -375,10 +388,18 @@ def _distance_with_params(t): return distance(t[0], t[1], **t[2]) +def _distance_with_params_ndim(t): + return distance(t[0], t[1], use_ndim=True, **t[2]) + + def _distance_c_with_params(t): return dtw_cc.distance(t[0], t[1], **t[2]) +def _distance_c_with_params_ndim(t): + return dtw_cc.distance_ndim(t[0], t[1], **t[2]) + + def _process_psi_arg(psi): psi_1b = 0 psi_1e = 0 @@ -428,7 +449,7 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, if np is None: raise NumpyException("Numpy is required for the warping_paths method") # Always use ndim to use np functions - cost, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=True) + cost, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=use_ndim) r, c = len(s1), len(s2) if max_length_diff is not None and abs(r - c) > max_length_diff: return inf @@ -703,7 +724,7 @@ def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, block=None, compact=False, parallel=False, use_c=False, use_mp=False, show_progress=False, only_triu=False, - inner_dist=innerdistance.default): + inner_dist=innerdistance.default, use_ndim=False): """Distance matrix for all sequences in s. :param s: Iterable of series @@ -758,6 +779,10 @@ def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, 'inner_dist': inner_dist } s = SeriesContainer.wrap(s) + if use_ndim: + ndim = s.detected_ndim + else: + ndim = 1 if max_length_diff is None: max_length_diff = inf dists = None @@ -771,29 +796,43 @@ def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, if use_c and parallel and not use_mp and dtw_cc_omp is not None: logger.info("Compute distances in C (parallel=OMP)") dist_opts['block'] = block - dists = dtw_cc_omp.distance_matrix(s, **dist_opts) + if use_ndim: + dists = dtw_cc_omp.distance_matrix_ndim(s, ndim, **dist_opts) + else: + dists = dtw_cc_omp.distance_matrix(s, **dist_opts) elif use_c and parallel and (dtw_cc_omp is None or use_mp): logger.info("Compute distances in C (parallel=MP)") idxs = _distance_matrix_idxs(block, len(s)) + if use_ndim: + fn = _distance_c_with_params_ndim + else: + fn = _distance_c_with_params with mp.Pool() as p: - dists = p.map(_distance_c_with_params, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) + dists = p.map(fn, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) elif use_c and not parallel: logger.info("Compute distances in C (parallel=No)") dist_opts['block'] = block - dists = dtw_cc.distance_matrix(s, **dist_opts) + if use_ndim: + dists = dtw_cc.distance_matrix_ndim(s, ndim, **dist_opts) + else: + dists = dtw_cc.distance_matrix(s, **dist_opts) elif not use_c and parallel: logger.info("Compute distances in Python (parallel=MP)") idxs = _distance_matrix_idxs(block, len(s)) + if use_ndim: + fn = _distance_with_params_ndim + else: + fn = _distance_with_params with mp.Pool() as p: - dists = p.map(_distance_with_params, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) + dists = p.map(fn, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) elif not use_c and not parallel: logger.info("Compute distances in Python (parallel=No)") dists = distance_matrix_python(s, block=block, show_progress=show_progress, - dist_opts=dist_opts) + dist_opts=dist_opts, use_ndim=use_ndim) else: raise Exception(f'Unsupported combination of: parallel={parallel}, ' @@ -844,7 +883,7 @@ def distance_array_index(a, b, nb_series): return idx -def distance_matrix_python(s, block=None, show_progress=False, dist_opts=None): +def distance_matrix_python(s, block=None, show_progress=False, dist_opts=None, use_ndim=False): if dist_opts is None: dist_opts = {} dists = array.array('d', [inf] * _distance_matrix_length(block, len(s))) @@ -859,7 +898,7 @@ def distance_matrix_python(s, block=None, show_progress=False, dist_opts=None): else: it_c = range(block[1][0], min(len(s), block[1][1])) for c in it_c: - dists[idx] = distance(s[r], s[c], **dist_opts) + dists[idx] = distance(s[r], s[c], use_ndim=use_ndim, **dist_opts) idx += 1 return dists @@ -952,9 +991,9 @@ def distance_matrix_fast(s, max_dist=None, use_pruning=False, max_length_diff=No only_triu=only_triu, inner_dist=inner_dist) -def warping_path(from_s, to_s, include_distance=False, **kwargs): +def warping_path(from_s, to_s, include_distance=False, use_ndim=False, **kwargs): """Compute warping path between two sequences.""" - dist, paths = warping_paths(from_s, to_s, **kwargs) + dist, paths = warping_paths(from_s, to_s, use_ndim=use_ndim, **kwargs) path = best_path(paths) if include_distance: return path, dist diff --git a/dtaidistance/dtw_ndim.py b/dtaidistance/dtw_ndim.py index 67f60ba2..3d226c4c 100644 --- a/dtaidistance/dtw_ndim.py +++ b/dtaidistance/dtw_ndim.py @@ -5,8 +5,11 @@ Dynamic Time Warping (DTW) for N-dimensional series. +All the functionality in this subpackage is also available in +the dtw subpackage with argument use_ndim. + :author: Wannes Meert -:copyright: Copyright 2017-2022 KU Leuven, DTAI Research Group. +:copyright: Copyright 2017-2024 KU Leuven, DTAI Research Group. :license: Apache License, Version 2.0, see LICENSE for details. """ @@ -16,6 +19,7 @@ import array from . import dtw +from . import ed from .dtw import _check_library, SeriesContainer, _distance_matrix_idxs, distances_array_to_matrix,\ _distance_matrix_length, _complete_block from . import util_numpy @@ -58,7 +62,7 @@ inf = float("inf") -def ub_euclidean(s1, s2): +def ub_euclidean(s1, s2, inner_dist=innerdistance.default): """ Euclidean (dependent) distance between two n-dimensional sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -68,21 +72,7 @@ def ub_euclidean(s1, s2): :param s2: Sequence of numbers, 1st dimension is sequence, 2nd dimension is n-dimensional value vector. :return: Euclidean distance """ - if np is None: - raise NumpyException("Numpy is required for the ub_euclidean method.") - n = min(len(s1), len(s2)) - ub = 0 - for i in range(n): - ub += np.sum((s1[i] - s2[i]) ** 2) - # If the two series differ in length, compare the last element of the shortest series - # to the remaining elements in the longer series - if len(s1) > len(s2): - for i in range(n, len(s1)): - ub += np.sum((s1[i] - s2[n - 1]) ** 2) - elif len(s1) < len(s2): - for i in range(n, len(s2)): - ub += np.sum((s1[n - 1] - s2[i]) ** 2) - return math.sqrt(ub) + return ed.distance(s1, s2, inner_dist=inner_dist, use_ndim=True) def distance(s1, s2, window=None, max_dist=None, @@ -133,117 +123,10 @@ def distance(s1, s2, window=None, max_dist=None, Generalizing dtw to the multi-dimensional case requires an adaptive approach. Data Mining and Knowledge Discovery, 31:1–31, 2016. """ - if use_c: - if dtw_cc is None: - logger.warning("C-library not available, using the Python version") - else: - return distance_fast(s1, s2, - window=window, - max_dist=max_dist, - max_step=max_step, - max_length_diff=max_length_diff, - penalty=penalty, - psi=psi, - use_pruning=use_pruning, - only_ub=only_ub, - inner_dist=inner_dist) - if np is None: - raise NumpyException("Numpy is required for the dtw_ndim.distance method " - "(Numpy is not required for the distance_fast method that uses the C library") - r, c = len(s1), len(s2) - if max_length_diff is not None and abs(r - c) > max_length_diff: - return inf - if window is None: - window = max(r, c) - if not max_step: - max_step = inf - else: - max_step *= max_step - if use_pruning or only_ub: - max_dist = ub_euclidean(s1, s2) ** 2 - if only_ub: - return max_dist - elif not max_dist: - max_dist = inf - else: - max_dist *= max_dist - if not penalty: - penalty = 0 - else: - penalty *= penalty - idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=False) - if psi is None: - psi = 0 - length = min(c + 1, abs(r - c) + 2 * (window - 1) + 1 + 1 + 1) - # print("length (py) = {}".format(length)) - dtw = array.array('d', [inf] * (2 * length)) - sc = 0 - ec = 0 - ec_next = 0 - smaller_found = False - for i in range(psi + 1): - dtw[i] = 0 - skip = 0 - i0 = 1 - i1 = 0 - psi_shortest = inf - for i in range(r): - # print("i={}".format(i)) - # print(dtw) - skipp = skip - skip = max(0, i - max(0, r - c) - window + 1) - i0 = 1 - i0 - i1 = 1 - i1 - for ii in range(i1 * length, i1 * length + length): - dtw[ii] = inf - j_start = max(0, i - max(0, r - c) - window + 1) - j_end = min(c, i + max(0, c - r) + window) - if sc > j_start: - j_start = sc - smaller_found = False - ec_next = i - if length == c + 1: - skip = 0 - if psi != 0 and j_start == 0 and i < psi: - dtw[i1 * length] = 0 - for j in range(j_start, j_end): - # d = np.sum((s1[i] - s2[j]) ** 2) - d = idist_fn(s1[i], s2[j]) - if d > max_step: - continue - assert j + 1 - skip >= 0 - assert j - skipp >= 0 - assert j + 1 - skipp >= 0 - assert j - skip >= 0 - dtw[i1 * length + j + 1 - skip] = d + min(dtw[i0 * length + j - skipp], - dtw[i0 * length + j + 1 - skipp] + penalty, - dtw[i1 * length + j - skip] + penalty) - # print('({},{}), ({},{}), ({},{})'.format(i0, j - skipp, i0, j + 1 - skipp, i1, j - skip)) - # print('{}, {}, {}'.format(dtw[i0, j - skipp], dtw[i0, j + 1 - skipp], dtw[i1, j - skip])) - # print('i={}, j={}, d={}, skip={}, skipp={}'.format(i,j,d,skip,skipp)) - # print(dtw) - if dtw[i1 * length + j + 1 - skip] > max_dist: - if not smaller_found: - sc = j + 1 - if j >= ec: - break - else: - smaller_found = True - ec_next = j + 1 - ec = ec_next - if psi != 0 and j_end == len(s2) and len(s1) - 1 - i <= psi: - psi_shortest = min(psi_shortest, dtw[i1 * length + length - 1]) - if psi == 0: - d = math.sqrt(dtw[i1 * length + min(c, c + window - 1) - skip]) - else: - ic = min(c, c + window - 1) - skip - vc = dtw[i1 * length + ic - psi:i1 * length + ic + 1] - d = min(array_min(vc), psi_shortest) - d = math.sqrt(d) - if max_dist and d > max_dist: - d = inf - d = result_fn(d) - return d + return dtw.distance(s1, s2, window=window, max_dist=max_dist, + max_step=max_step, max_length_diff=max_length_diff, penalty=penalty, psi=psi, + use_c=use_c, use_pruning=use_pruning, only_ub=only_ub, + inner_dist=inner_dist, use_ndim=True) def distance_fast(s1, s2, window=None, max_dist=None, @@ -253,31 +136,10 @@ def distance_fast(s1, s2, window=None, max_dist=None, Note: the series are expected to be arrays of the type ``double``. Thus ``numpy.array([[1,1],[2,2],[3,3]], dtype=numpy.double)`` """ - _check_library(raise_exception=True) - # Check that Numpy arrays for C contiguous - if np is not None: - if isinstance(s1, (np.ndarray, np.generic)): - if not s1.data.c_contiguous: - logger.debug("Warning: Sequence 1 passed to method distance is not C-contiguous. " + - "The sequence will be copied.") - s1 = s1.copy(order='C') - if isinstance(s2, (np.ndarray, np.generic)): - if not s2.data.c_contiguous: - logger.debug("Warning: Sequence 2 passed to method distance is not C-contiguous. " + - "The sequence will be copied.") - s2 = s2.copy(order='C') - # Move data to C library - d = dtw_cc.distance_ndim(s1, s2, - window=window, - max_dist=max_dist, - max_step=max_step, - max_length_diff=max_length_diff, - penalty=penalty, - psi=psi, - use_pruning=use_pruning, - only_ub=only_ub, - inner_dist=inner_dist) - return d + return dtw.distance_fast(s1, s2, window=window, max_dist=max_dist, + max_step=max_step, max_length_diff=max_length_diff, penalty=penalty, + psi=psi, use_pruning=use_pruning, only_ub=only_ub, + inner_dist=inner_dist, use_ndim=True) def warping_paths(*args, **kwargs): @@ -298,35 +160,6 @@ def warping_paths_fast(*args, **kwargs): return dtw.warping_paths_fast(*args, use_ndim=True, **kwargs) -def _distance_with_params(t): - return distance(t[0], t[1], **t[2]) - - -def _distance_c_with_params(t): - return dtw_cc.distance_ndim(t[0], t[1], **t[3]) - - -def distance_matrix_python(s, block=None, show_progress=False, max_length_diff=None, dist_opts=None): - if dist_opts is None: - dist_opts = {} - dists = array.array('d', [inf] * _distance_matrix_length(block, len(s))) - block, triu = _complete_block(block, len(s)) - it_r = range(block[0][0], block[0][1]) - if show_progress: - it_r = tqdm(it_r) - idx = 0 - for r in it_r: - if triu: - it_c = range(max(r + 1, block[1][0]), min(len(s), block[1][1])) - else: - it_c = range(block[1][0], min(len(s), block[1][1])) - for c in it_c: - if abs(len(s[r]) - len(s[c])) <= max_length_diff: - dists[idx] = distance(s[r], s[c], **dist_opts) - idx += 1 - return dists - - def distance_matrix(s, ndim=None, max_dist=None, use_pruning=False, max_length_diff=None, window=None, max_step=None, penalty=None, psi=None, block=None, compact=False, parallel=False, @@ -370,85 +203,13 @@ def distance_matrix(s, ndim=None, max_dist=None, use_pruning=False, max_length_d Data Mining and Knowledge Discovery, 31:1–31, 2016. """ # Check whether multiprocessing is available - if use_c: - _check_library(raise_exception=True) - if use_c and parallel: - if dtw_cc_omp is None: - logger.warning('OMP extension not loaded, using multiprocessing') - if parallel and (use_mp or not use_c or dtw_cc_omp is None): - try: - import multiprocessing as mp - logger.info('Using multiprocessing') - except ImportError: - msg = 'Cannot load multiprocessing' - logger.error(msg) - raise Exception(msg) - else: - mp = None - # Prepare options and data to pass to distance method - dist_opts = { - 'max_dist': max_dist, - 'max_step': max_step, - 'window': window, - 'max_length_diff': max_length_diff, - 'penalty': penalty, - 'psi': psi, - 'use_pruning': use_pruning, - 'inner_dist': inner_dist - } s = SeriesContainer.wrap(s) - if ndim is None: - ndim = s.detected_ndim - if max_length_diff is None: - max_length_diff = inf - dists = None - if use_c: - for k, v in dist_opts.items(): - if v is None: - # None is represented as 0.0 for C - dist_opts[k] = 0 - - logger.info('Computing n-dim distances') - if use_c and parallel and not use_mp and dtw_cc_omp is not None: - logger.info("Compute distances in C (parallel=OMP)") - dist_opts['block'] = block - dists = dtw_cc_omp.distance_matrix_ndim(s, ndim, **dist_opts) - - elif use_c and parallel and (dtw_cc_omp is None or use_mp): - logger.info("Compute distances in C (parallel=MP)") - idxs = _distance_matrix_idxs(block, len(s)) - with mp.Pool() as p: - dists = p.map(_distance_c_with_params, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) - - elif use_c and not parallel: - logger.info("Compute distances in C (parallel=No)") - dist_opts['block'] = block - dists = dtw_cc.distance_matrix_ndim(s, ndim, **dist_opts) - - elif not use_c and parallel: - logger.info("Compute distances in Python (parallel=MP)") - idxs = _distance_matrix_idxs(block, len(s)) - with mp.Pool() as p: - dists = p.map(_distance_with_params, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) - - elif not use_c and not parallel: - logger.info("Compute distances in Python (parallel=No)") - dists = distance_matrix_python(s, block=block, show_progress=show_progress, - max_length_diff=max_length_diff, dist_opts=dist_opts) - - else: - raise Exception(f'Unsupported combination of: parallel={parallel}, ' - f'use_c={use_c}, dtw_cc_omp={dtw_cc_omp}, use_mp={use_mp}') - - exp_length = _distance_matrix_length(block, len(s)) - assert len(dists) == exp_length, "len(dists)={} != {}".format(len(dists), exp_length) - if compact: - return dists - - # Create full matrix and fill upper triangular matrix with distance values (or only block if specified) - dists_matrix = distances_array_to_matrix(dists, nb_series=len(s), block=block, only_triu=only_triu) - - return dists_matrix + s.set_detected_ndim(ndim) + return dtw.distance_matrix(s, max_dist=max_dist, use_pruning=use_pruning, max_length_diff=max_length_diff, + window=window, max_step=max_step, penalty=penalty, psi=psi, + block=block, compact=compact, parallel=parallel, + use_c=use_c, use_mp=use_mp, show_progress=show_progress, only_triu=only_triu, + inner_dist=inner_dist, use_ndim=True) def distance_matrix_fast(s, ndim=None, max_dist=None, max_length_diff=None, @@ -456,7 +217,6 @@ def distance_matrix_fast(s, ndim=None, max_dist=None, max_length_diff=None, block=None, compact=False, parallel=True, only_triu=False, inner_dist=innerdistance.default): """Fast C version of :meth:`distance_matrix`.""" - _check_library(raise_exception=True, include_omp=parallel) return distance_matrix(s, ndim=ndim, max_dist=max_dist, max_length_diff=max_length_diff, window=window, max_step=max_step, penalty=penalty, psi=psi, block=block, compact=compact, parallel=parallel, @@ -466,6 +226,4 @@ def distance_matrix_fast(s, ndim=None, max_dist=None, max_length_diff=None, def warping_path(from_s, to_s, **kwargs): """Compute warping path between two sequences.""" - dist, paths = warping_paths(from_s, to_s, **kwargs) - path = dtw.best_path(paths) - return path + return dtw.warping_path(from_s, to_s, use_ndim=True, **kwargs) diff --git a/dtaidistance/ed.py b/dtaidistance/ed.py index 9bb9ee89..c819d884 100644 --- a/dtaidistance/ed.py +++ b/dtaidistance/ed.py @@ -45,7 +45,7 @@ def _check_library(raise_exception=True): raise Exception(msg) -def distance(s1, s2, inner_dist=innerdistance.default): +def distance(s1, s2, inner_dist=innerdistance.default, use_ndim=False): """ Euclidean distance between two sequences. Supports different lengths. If the two series differ in length, compare the last element of the shortest series @@ -57,11 +57,11 @@ def distance(s1, s2, inner_dist=innerdistance.default): :param inner_dist: Inner distance function between two values :return: Euclidean distance """ - idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist=inner_dist, use_ndim=False) + idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist=inner_dist, use_ndim=use_ndim) n = min(len(s1), len(s2)) ub = 0 for v1, v2 in zip(s1, s2): - ub += (v1 - v2)**2 + ub += idist_fn(v1, v2) # (v1 - v2)**2 # If the two series differ in length, compare the last element of the shortest series # to the remaining elements in the longer series if len(s1) > len(s2): diff --git a/dtaidistance/innerdistance.py b/dtaidistance/innerdistance.py index d666b76a..b8fe91cf 100644 --- a/dtaidistance/innerdistance.py +++ b/dtaidistance/innerdistance.py @@ -45,6 +45,8 @@ def inner_dist(x, y): @staticmethod def result(x): + if np is not None and isinstance(x, np.ndarray): + return np.sqrt(x) return math.sqrt(x) @@ -87,6 +89,9 @@ class CustomInnerDist: def inner_dist(x, y): """The distance between two points in the series. + For n-dimensional data, the two arguments x and y will be vectors. + Otherwise, they are scalars. + For example, for default DTW this would be the Squared Euclidean distance: (a-b)**2. """ @@ -96,6 +101,8 @@ def inner_dist(x, y): def result(x): """The transformation applied to the sum of all inner distances. + The variable x can be both a single number as a matrix. + For example, for default DTW, which uses Squared Euclidean, this would be: sqrt(d). Because d = (a_0-b_0)**2 + (a_1-b_1)**2 ... """ diff --git a/dtaidistance/subsequence/subsequencesearch.py b/dtaidistance/subsequence/subsequencesearch.py index 9195b713..18f348ff 100644 --- a/dtaidistance/subsequence/subsequencesearch.py +++ b/dtaidistance/subsequence/subsequencesearch.py @@ -216,6 +216,7 @@ def align(self, k=None): import heapq h = [(-np.inf, -1)] max_dist = self.max_dist + self.dists_options['max_dist'] = max_dist for idx, series in enumerate(self.s): if self.use_lb: lb = lb_keogh(self.query, series, **self.dists_options) diff --git a/dtaidistance/util.py b/dtaidistance/util.py index dcae7cd5..58b5c553 100644 --- a/dtaidistance/util.py +++ b/dtaidistance/util.py @@ -239,6 +239,11 @@ def __init__(self, series, support_ndim=True): else: self.series = series + def set_detected_ndim(self, ndim): + if ndim is None: + return + self.detected_ndim = ndim + def c_data_compat(self): """Return a datastructure that the C-component knows how to handle. The method tries to avoid copying or reallocating memory. From 9372f4585462c7f490bfeb3cc7e9ab422d3fee42 Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 28 Mar 2024 21:38:31 +0100 Subject: [PATCH 16/33] differencing change to gust method --- tests/test_preprocessing.py | 39 +++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 tests/test_preprocessing.py diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 00000000..337b595f --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,39 @@ +import logging +import sys, os +import random +from pathlib import Path +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) + +import pytest + +from dtaidistance.preprocessing import differencing +from dtaidistance import util_numpy + +logger = logging.getLogger("be.kuleuven.dtai.distance") +numpyonly = pytest.mark.skipif("util_numpy.test_without_numpy()") +scipyonly = pytest.mark.skipif("util_numpy.test_without_scipy()") + + +@numpyonly +@scipyonly +def test_differencing(): + with util_numpy.test_uses_numpy() as np: + series = np.array([0.1, 0.3, 0.2, 0.1] * 3) + series = differencing(series, smooth=0.1) + np.testing.assert_array_almost_equal( + series, np.array([0.02217, 0.010307, 0.002632, 0.001504, 0.001629, -0.000457, + -0.001698, -0.001238, -0.004681, -0.014869, -0.026607])) + + series = np.array([[0.1, 0.3, 0.2, 0.1] * 3]) + series = differencing(series, smooth=0.1) + np.testing.assert_array_almost_equal( + series, np.array([[0.02217, 0.010307, 0.002632, 0.001504, 0.001629, -0.000457, + -0.001698, -0.001238, -0.004681, -0.014869, -0.026607]])) + + +if __name__ == "__main__": + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler(sys.stdout)) + directory = Path(os.environ.get('TESTDIR', Path(__file__).parent)) + print(f"Saving files to {directory}") + test_differencing() From 40384c957a0c52eb9ed57763ce2b6e430ecca11d Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 28 Mar 2024 23:17:51 +0100 Subject: [PATCH 17/33] Use DTWSettings for arguments --- docs/modules/dtw.rst | 3 + dtaidistance/dtw.py | 360 ++++++++++++++-------------------- dtaidistance/innerdistance.py | 32 ++- 3 files changed, 184 insertions(+), 211 deletions(-) diff --git a/docs/modules/dtw.rst b/docs/modules/dtw.rst index 18e0dd88..eebad7cc 100644 --- a/docs/modules/dtw.rst +++ b/docs/modules/dtw.rst @@ -1,3 +1,6 @@ .. automodule:: dtaidistance.dtw :members: + +.. autoclass:: dtaidistance.dtw.DTWSettings + :members: diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 04f67445..581ab189 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -109,7 +109,34 @@ def _check_library(include_omp=False, raise_exception=True): class DTWSettings: def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, - max_length_diff=None, penalty=None, psi=None, inner_dist=innerdistance.default): + max_length_diff=None, penalty=None, psi=None, inner_dist=innerdistance.default, + use_ndim=False, use_c=False): + """Settings for Dynamic Time Warping distance methods. + + :param window: Only allow for maximal shifts from the two diagonals smaller than this number. + It includes the diagonal, meaning that an Euclidean distance is obtained by setting + ``window=1.`` + :param max_dist: Stop if the returned values will be larger than this value + :param max_step: Do not allow steps larger than this value + :param max_length_diff: Return infinity if length of two series is larger + :param penalty: Penalty to add if compression or expansion is applied + :param psi: Psi relaxation parameter (ignore start and end of matching). + If psi is a single integer, it is used for both start and end relaxations of both series. + If psi is a 4-tuple, it is used as the psi-relaxation for + (begin series1, end series1, begin series2, end series2). + Useful for cyclical series. + :param use_pruning: Prune values based on Euclidean distance. + This is the same as passing ub_euclidean() to max_dist + :param inner_dist: Distance between two points in the time series. + One of 'squared euclidean' (default), 'euclidean'. + When using the pure Python implementation (thus use_c=False) then the argument can also + be an object that has as callable arguments 'inner_dist' and 'result'. The 'inner_dist' + function computes the distance between two points (e.g., squared euclidean) and 'result' + is the function to apply to the final distance (e.g., sqrt when using squared euclidean). + You can also inherit from the 'innerdistance.CustomInnerDist' class. + :param use_ndim: Use n-dimensional (aka multivariate) series instead of 1-dimensional series. + :param use_c: Use the C variant if available. + """ self.window = window self.use_pruning = use_pruning self.max_dist = max_dist @@ -118,16 +145,52 @@ def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, self.penalty = penalty self.psi = psi self.inner_dist = inner_dist + self.use_ndim = use_ndim + self.use_c = use_c + + _, _, inner_val = innerdistance.inner_dist_fns(self.inner_dist) + + if not self.max_step: + self.adj_max_step = inf + else: + self.adj_max_step = inner_val(self.max_step) + + if not self.max_dist: + self.adj_max_dist = inf + else: + self.adj_max_dist = inner_val(self.max_dist) + + if not self.penalty: + self.adj_penalty = 0 + else: + self.adj_penalty = inner_val(self.penalty) @staticmethod def for_dtw(s1, s2, **kwargs): settings = DTWSettings(**kwargs) + if settings.window is None: + settings.window = max(len(s1), len(s2)) settings.set_max_dist(s1, s2) return settings def set_max_dist(self, s1, s2): + _, _, ival_fn = innerdistance.inner_dist_fns(self.inner_dist, use_ndim=self.use_ndim) if self.use_pruning: - self.max_dist = ub_euclidean(s1, s2)**2 + self.adj_max_dist = ival_fn(ub_euclidean(s1, s2, inner_dist=self.inner_dist)) + + def kwargs(self): + return { + 'window': self.window, + 'use_pruning': self.use_pruning, + 'max_dist': self.max_dist, + 'max_step': self.max_step, + 'max_length_diff': self.max_length_diff, + 'penalty': self.penalty, + 'psi': self.psi, + 'inner_dist': self.inner_dist, + 'use_ndim': self.use_ndim, + 'use_c': self.use_c + } def c_kwargs(self): window = 0 if self.window is None else self.window @@ -136,6 +199,7 @@ def c_kwargs(self): max_length_diff = 0 if self.max_length_diff is None else self.max_length_diff penalty = 0 if self.penalty is None else self.penalty psi = 0 if self.psi is None else self.psi + use_pruning = 0 if self.use_pruning is None else self.use_pruning inner_dist = innerdistance.to_c(self.inner_dist) return { 'window': window, @@ -144,6 +208,7 @@ def c_kwargs(self): 'max_length_diff': max_length_diff, 'penalty': penalty, 'psi': psi, + 'use_pruning': use_pruning, 'inner_dist': inner_dist } @@ -155,18 +220,19 @@ def __str__(self): return r -def lb_keogh(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None, use_c=False, use_ndim=False, inner_dist=innerdistance.default): +def lb_keogh(s1, s2, **kwargs): """Lowerbound LB_KEOGH""" - if use_c: - return dtw_cc.lb_keogh(s1, s2, window=window, max_dist=max_dist, max_step=max_step, inner_dist=inner_dist) - if window is None: - window = max(len(s1), len(s2)) - idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=use_ndim) + s = DTWSettings(**kwargs) + if s.use_c: + return dtw_cc.lb_keogh(s1, s2, window=s.window, max_dist=s.max_dist, + max_step=s.max_step, inner_dist=s.inner_dist) + if s.window is None: + s.window = max(len(s1), len(s2)) + idist_fn, result_fn, _ = innerdistance.inner_dist_fns(s.inner_dist, use_ndim=s.use_ndim) t = 0 - imin_diff = max(0, len(s1) - len(s2)) + window - 1 - imax_diff = max(0, len(s2) - len(s1)) + window + imin_diff = max(0, len(s1) - len(s2)) + s.window - 1 + imax_diff = max(0, len(s2) - len(s1)) + s.window for i in range(len(s1)): imin = max(0, i - imin_diff) imax = min(len(s2), i + imax_diff) @@ -187,11 +253,7 @@ def ub_euclidean(s1, s2, inner_dist=innerdistance.default): return ed.distance(s1, s2, inner_dist=inner_dist) -def distance(s1, s2, - window=None, max_dist=None, max_step=None, - max_length_diff=None, penalty=None, psi=None, - use_c=False, use_pruning=False, only_ub=False, - inner_dist=innerdistance.default, use_ndim=False): +def distance(s1, s2, only_ub=False, **kwargs): """ Dynamic Time Warping. @@ -207,70 +269,26 @@ def distance(s1, s2, :param s1: First sequence :param s2: Second sequence - :param window: Only allow for maximal shifts from the two diagonals smaller than this number. - It includes the diagonal, meaning that an Euclidean distance is obtained by setting - ``window=1.`` - :param max_dist: Stop if the returned values will be larger than this value - :param max_step: Do not allow steps larger than this value - :param max_length_diff: Return infinity if length of two series is larger - :param penalty: Penalty to add if compression or expansion is applied - :param psi: Psi relaxation parameter (ignore start and end of matching). - If psi is a single integer, it is used for both start and end relaxations of both series. - If psi is a 4-tuple, it is used as the psi-relaxation for - (begin series1, end series1, begin series2, end series2). - Useful for cyclical series. - :param use_c: Use fast pure c compiled functions - :param use_pruning: Prune values based on Euclidean distance. - This is the same as passing ub_euclidean() to max_dist :param only_ub: Only compute the upper bound (Euclidean). - :param inner_dist: Distance between two points in the time series. - One of 'squared euclidean' (default), 'euclidean'. - When using the pure Python implementation (thus use_c=False) then the argument can also - be an object that has as callable arguments 'inner_dist' and 'result'. The 'inner_dist' - function computes the distance between two points (e.g., squared euclidean) and 'result' - is the function to apply to the final distance (e.g., sqrt when using squared euclidean). - You can also inherit from the 'innerdistance.CustomInnerDist' class. + :param kwargs: :class:`DTWSettings` arguments Returns: DTW distance """ - if use_c: + s = DTWSettings.for_dtw(s1, s2, **kwargs) + if s.use_c: if dtw_cc is None: logger.warning("C-library not available, using the Python version") else: - return distance_fast(s1, s2, window, - max_dist=max_dist, - max_step=max_step, - max_length_diff=max_length_diff, - penalty=penalty, - psi=psi, - use_pruning=use_pruning, - only_ub=only_ub, - inner_dist=inner_dist, - use_ndim=use_ndim) + return distance_fast(s1, s2, **s.kwargs()) + idist_fn, result_fn, ival_fn = innerdistance.inner_dist_fns(s.inner_dist, use_ndim=s.use_ndim) r, c = len(s1), len(s2) - if max_length_diff is not None and abs(r - c) > max_length_diff: + if s.max_length_diff is not None and abs(r - c) > s.max_length_diff: return inf - if window is None: - window = max(r, c) - if not max_step: - max_step = inf - else: - max_step *= max_step - if use_pruning or only_ub: - max_dist = ub_euclidean(s1, s2)**2 - if only_ub: - return max_dist - elif not max_dist: - max_dist = inf - else: - max_dist *= max_dist - if not penalty: - penalty = 0 - else: - penalty *= penalty - idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=use_ndim) - psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(psi) - length = min(c + 1, abs(r - c) + 2 * (window - 1) + 1 + 1 + 1) + if only_ub: + return ival_fn(ub_euclidean(s1, s2, inner_dist=s.inner_dist)) + + psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(s.psi) + length = min(c + 1, abs(r - c) + 2 * (s.window - 1) + 1 + 1 + 1) # print("length (py) = {}".format(length)) dtw = array.array('d', [inf] * (2 * length)) sc = 0 @@ -287,13 +305,13 @@ def distance(s1, s2, # print("i={}".format(i)) # print(dtw) skipp = skip - skip = max(0, i - max(0, r - c) - window + 1) + skip = max(0, i - max(0, r - c) - s.window + 1) i0 = 1 - i0 i1 = 1 - i1 for ii in range(i1*length, i1*length+length): dtw[ii] = inf - j_start = max(0, i - max(0, r - c) - window + 1) - j_end = min(c, i + max(0, c - r) + window) + j_start = max(0, i - max(0, r - c) - s.window + 1) + j_end = min(c, i + max(0, c - r) + s.window) if sc > j_start: j_start = sc smaller_found = False @@ -305,20 +323,20 @@ def distance(s1, s2, for j in range(j_start, j_end): # d = (s1[i] - s2[j])**2 d = idist_fn(s1[i], s2[j]) - if d > max_step: + if d > s.adj_max_step: continue assert j + 1 - skip >= 0 assert j - skipp >= 0 assert j + 1 - skipp >= 0 assert j - skip >= 0 dtw[i1 * length + j + 1 - skip] = d + min(dtw[i0 * length + j - skipp], - dtw[i0 * length + j + 1 - skipp] + penalty, - dtw[i1 * length + j - skip] + penalty) + dtw[i0 * length + j + 1 - skipp] + s.adj_penalty, + dtw[i1 * length + j - skip] + s.adj_penalty) # print('({},{}), ({},{}), ({},{})'.format(i0, j - skipp, i0, j + 1 - skipp, i1, j - skip)) # print('{}, {}, {}'.format(dtw[i0, j - skipp], dtw[i0, j + 1 - skipp], dtw[i1, j - skip])) # print('i={}, j={}, d={}, skip={}, skipp={}'.format(i,j,d,skip,skipp)) # print(dtw) - if dtw[i1 * length + j + 1 - skip] > max_dist: + if dtw[i1 * length + j + 1 - skip] > s.adj_max_dist: if not smaller_found: sc = j + 1 if j >= ec: @@ -330,23 +348,21 @@ def distance(s1, s2, if psi_1e != 0 and j_end == len(s2) and len(s1) - 1 - i <= psi_1e: psi_shortest = min(psi_shortest, dtw[i1 * length + j_end - skip]) if psi_1e == 0 and psi_2e == 0: - d = dtw[i1 * length + min(c, c + window - 1) - skip] + d = dtw[i1 * length + min(c, c + s.window - 1) - skip] else: - ic = min(c, c + window - 1) - skip + ic = min(c, c + s.window - 1) - skip if psi_2e != 0: vc = dtw[i1 * length + ic - psi_2e:i1 * length + ic + 1] d = min(array_min(vc), psi_shortest) else: - d = min(dtw[i1 * length + min(c, c + window - 1) - skip], psi_shortest) - if max_dist and d > max_dist: + d = min(dtw[i1 * length + min(c, c + s.window - 1) - skip], psi_shortest) + if s.adj_max_dist and d > s.adj_max_dist: d = inf d = result_fn(d) return d -def distance_fast(s1, s2, window=None, max_dist=None, - max_step=None, max_length_diff=None, penalty=None, psi=None, use_pruning=False, only_ub=False, - inner_dist=innerdistance.default, use_ndim=False): +def distance_fast(s1, s2, only_ub=False, **kwargs): """Same as :meth:`distance` but with different defaults to chose the fast C-based version of the implementation (use_c = True). @@ -358,29 +374,12 @@ def distance_fast(s1, s2, window=None, max_dist=None, # Check that Numpy arrays for C contiguous s1 = util_numpy.verify_np_array(s1) s2 = util_numpy.verify_np_array(s2) + s = DTWSettings(**kwargs) # Move data to C library - if use_ndim is False: - d = dtw_cc.distance(s1, s2, - window=window, - max_dist=max_dist, - max_step=max_step, - max_length_diff=max_length_diff, - penalty=penalty, - psi=psi, - use_pruning=use_pruning, - only_ub=only_ub, - inner_dist=inner_dist) + if s.use_ndim is False: + d = dtw_cc.distance(s1, s2, only_ub=only_ub, **s.c_kwargs()) else: - d = dtw_cc.distance_ndim(s1, s2, - window=window, - max_dist=max_dist, - max_step=max_step, - max_length_diff=max_length_diff, - penalty=penalty, - psi=psi, - use_pruning=use_pruning, - only_ub=only_ub, - inner_dist=inner_dist) + d = dtw_cc.distance_ndim(s1, s2, only_ub=only_ub, **s.c_kwargs()) return d @@ -415,9 +414,7 @@ def _process_psi_arg(psi): return psi_1b, psi_1e, psi_2b, psi_2e -def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, - max_step=None, max_length_diff=None, penalty=None, psi=None, psi_neg=True, - use_c=False, use_ndim=False, inner_dist=innerdistance.default): +def warping_paths(s1, s2, psi_neg=True, **kwargs): """ Dynamic Time Warping. @@ -425,51 +422,20 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, :param s1: First sequence :param s2: Second sequence - :param window: see :meth:`distance` - :param max_dist: see :meth:`distance` - :param use_pruning: Prune values based on Euclidean distance. - This is the same as passing ub_euclidean() to max_dist - :param max_step: see :meth:`distance` - :param max_length_diff: see :meth:`distance` - :param penalty: see :meth:`distance` - :param psi: see :meth:`distance` :param psi_neg: Replace values that should be skipped because of psi-relaxation with -1. - :param use_c: Use the C implementation instead of Python - :param use_ndim: The input series is >1 dimensions. - Use cost = SquaredEuclideanDistance(s1[i], s2[j]) - :param inner_dist: Distance between two points in the time series. - One of 'squared euclidean' (default), 'euclidean' + :param kwargs: see :class:`DTWSettings` :returns: (DTW distance, DTW matrix) """ - if use_c: - return warping_paths_fast(s1, s2, window=window, max_dist=max_dist, use_pruning=use_pruning, - max_step=max_step, max_length_diff=max_length_diff, - penalty=penalty, psi=psi, psi_neg=psi_neg, compact=False, - use_ndim=use_ndim, inner_dist=inner_dist) + s = DTWSettings.for_dtw(s1, s2, **kwargs) + if s.use_c: + return warping_paths_fast(s1, s2, psi_neg=psi_neg, **s.kwargs()) if np is None: raise NumpyException("Numpy is required for the warping_paths method") - # Always use ndim to use np functions - cost, result_fn = innerdistance.inner_dist_fns(inner_dist, use_ndim=use_ndim) + cost, result_fn, ival_fn = innerdistance.inner_dist_fns(s.inner_dist, use_ndim=s.use_ndim) r, c = len(s1), len(s2) - if max_length_diff is not None and abs(r - c) > max_length_diff: + if s.max_length_diff is not None and abs(r - c) > s.max_length_diff: return inf - if window is None: - window = max(r, c) - if not max_step: - max_step = inf - else: - max_step *= max_step - if use_pruning: - max_dist = ub_euclidean(s1, s2)**2 - elif not max_dist: - max_dist = inf - else: - max_dist *= max_dist - if penalty is None: - penalty = 0 - else: - penalty *= penalty - psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(psi) + psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(s.psi) dtw = np.full((r + 1, c + 1), inf) # dtw[0, 0] = 0 for i in range(psi_2b + 1): @@ -485,8 +451,8 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, for i in range(r): i0 = i i1 = i + 1 - j_start = max(0, i - max(0, r - c) - window + 1) - j_end = min(c, i + max(0, c - r) + window) + j_start = max(0, i - max(0, r - c) - s.window + 1) + j_end = min(c, i + max(0, c - r) + s.window) if sc > j_start: j_start = sc smaller_found = False @@ -502,14 +468,14 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, for j in range(j_start, j_end): # print('j =', j, 'max=',min(c, c - r + i + window)) d = cost(s1[i], s2[j]) - if max_step is not None and d > max_step: + if s.adj_max_step is not None and d > s.adj_max_step: continue # print(i, j + 1 - skip, j - skipp, j + 1 - skipp, j - skip) dtw[i1, j + 1] = d + min(dtw[i0, j], - dtw[i0, j + 1] + penalty, - dtw[i1, j] + penalty) + dtw[i0, j + 1] + s.adj_penalty, + dtw[i1, j] + s.adj_penalty) # dtw[i + 1, j + 1 - skip] = d + min(dtw[i + 1, j + 1 - skip], dtw[i + 1, j - skip]) - if dtw[i1, j + 1] > max_dist: + if dtw[i1, j + 1] > s.adj_max_dist: if not smaller_found: sc = j + 1 if j >= ec: @@ -521,10 +487,10 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, # Decide which d to return dtw = result_fn(dtw) if psi_1e == 0 and psi_2e == 0: - d = dtw[i1, min(c, c + window - 1)] + d = dtw[i1, min(c, c + s.window - 1)] else: ir = i1 - ic = min(c, c + window - 1) + ic = min(c, c + s.window - 1) if psi_1e != 0: vr = dtw[ir:max(0, ir-psi_1e-1):-1, ic] mir = argmin(vr) @@ -547,14 +513,12 @@ def warping_paths(s1, s2, window=None, max_dist=None, use_pruning=False, if psi_neg: dtw[ir, ic:ic-mic:-1] = -1 d = vc_mic - if max_dist and d*d > max_dist: + if s.adj_max_dist and d*d > s.adj_max_dist: d = inf return d, dtw -def warping_paths_fast(s1, s2, window=None, max_dist=None, use_pruning=False, - max_step=None, max_length_diff=None, penalty=None, psi=None, psi_neg=True, compact=False, - use_ndim=False, inner_dist=innerdistance.default): +def warping_paths_fast(s1, s2, psi_neg=True, compact=False, **kwargs): """Fast C version of :meth:`warping_paths`. Additional parameters: @@ -566,20 +530,20 @@ def warping_paths_fast(s1, s2, window=None, max_dist=None, use_pruning=False, s2 = util_numpy.verify_np_array(s2) r = len(s1) c = len(s2) + s = DTWSettings(**kwargs) _check_library(raise_exception=True) - settings = DTWSettings.for_dtw(s1, s2, window=window, max_dist=max_dist, use_pruning=use_pruning, max_step=max_step, - max_length_diff=max_length_diff, penalty=penalty, psi=psi, inner_dist=inner_dist) + settings = DTWSettings.for_dtw(s1, s2, **kwargs) if compact: wps_width = dtw_cc.wps_width(r, c, **settings.c_kwargs()) wps_compact = np.full((len(s1)+1, wps_width), inf) - if use_ndim: + if settings.use_ndim: d = dtw_cc.warping_paths_compact_ndim(wps_compact, s1, s2, psi_neg, **settings.c_kwargs()) else: d = dtw_cc.warping_paths_compact(wps_compact, s1, s2, psi_neg, **settings.c_kwargs()) return d, wps_compact dtw = np.full((r + 1, c + 1), inf) - if use_ndim: + if settings.use_ndim: d = dtw_cc.warping_paths_ndim(dtw, s1, s2, psi_neg, **settings.c_kwargs()) else: d = dtw_cc.warping_paths(dtw, s1, s2, psi_neg, **settings.c_kwargs()) @@ -720,39 +684,29 @@ def distance_matrix_wrapper(seqs, **kwargs): return distance_matrix_wrapper -def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, - window=None, max_step=None, penalty=None, psi=None, - block=None, compact=False, parallel=False, - use_c=False, use_mp=False, show_progress=False, only_triu=False, - inner_dist=innerdistance.default, use_ndim=False): +def distance_matrix(s, block=None, compact=False, parallel=False, + use_mp=False, show_progress=False, only_triu=False, **kwargs): """Distance matrix for all sequences in s. :param s: Iterable of series - :param max_dist: see :meth:`distance` - :param use_pruning: Prune values based on Euclidean distance. - This is the same as passing ub_euclidean() to max_dist - :param max_length_diff: see :meth:`distance` - :param window: see :meth:`distance` - :param max_step: see :meth:`distance` - :param penalty: see :meth:`distance` - :param psi: see :meth:`distance` :param block: Only compute block in matrix. Expects tuple with begin and end, e.g. ((0,10),(20,25)) will only compare rows 0:10 with rows 20:25. :param compact: Return the distance matrix as an array representing the upper triangular matrix. :param parallel: Use parallel operations - :param use_c: Use c compiled Python functions :param use_mp: Force use Multiprocessing for parallel operations (not OpenMP) :param show_progress: Show progress using the tqdm library. This is only supported for the pure Python version (thus not the C-based implementations). :param only_triu: Only compute upper traingular matrix of warping paths. This is useful if s1 and s2 are the same series and the matrix would be mirrored around the diagonal. + :param kwargs: Arguments for :class:`DTWSettings` :returns: The distance matrix or the condensed distance matrix if the compact argument is true """ + settings = DTWSettings(**kwargs) # Check whether multiprocessing is available - if use_c: + if settings.use_c: requires_omp = parallel and not use_mp _check_library(raise_exception=True, include_omp=requires_omp) - if parallel and (use_mp or not use_c): + if parallel and (use_mp or not settings.use_c): try: import multiprocessing as mp logger.info('Using multiprocessing') @@ -768,75 +722,65 @@ def distance_matrix(s, max_dist=None, use_pruning=False, max_length_diff=None, if (block[0][1] - block[0][0]) < 1 or (block[1][1] - block[1][0]) < 1: return [] # Prepare options and data to pass to distance method - dist_opts = { - 'max_dist': max_dist, - 'max_step': max_step, - 'window': window, - 'max_length_diff': max_length_diff, - 'penalty': penalty, - 'psi': psi, - 'use_pruning': use_pruning, - 'inner_dist': inner_dist - } + dist_opts = settings.kwargs() s = SeriesContainer.wrap(s) - if use_ndim: + if settings.use_ndim: ndim = s.detected_ndim else: ndim = 1 - if max_length_diff is None: + if settings.max_length_diff is None: max_length_diff = inf dists = None - if use_c: + if settings.use_c: for k, v in dist_opts.items(): if v is None: # None is represented as 0.0 for C dist_opts[k] = 0 logger.info('Computing distances') - if use_c and parallel and not use_mp and dtw_cc_omp is not None: + if settings.use_c and parallel and not use_mp and dtw_cc_omp is not None: logger.info("Compute distances in C (parallel=OMP)") dist_opts['block'] = block - if use_ndim: + if settings.use_ndim: dists = dtw_cc_omp.distance_matrix_ndim(s, ndim, **dist_opts) else: dists = dtw_cc_omp.distance_matrix(s, **dist_opts) - elif use_c and parallel and (dtw_cc_omp is None or use_mp): + elif settings.use_c and parallel and (dtw_cc_omp is None or use_mp): logger.info("Compute distances in C (parallel=MP)") idxs = _distance_matrix_idxs(block, len(s)) - if use_ndim: + if settings.use_ndim: fn = _distance_c_with_params_ndim else: fn = _distance_c_with_params with mp.Pool() as p: dists = p.map(fn, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) - elif use_c and not parallel: + elif settings.use_c and not parallel: logger.info("Compute distances in C (parallel=No)") dist_opts['block'] = block - if use_ndim: + if settings.use_ndim: dists = dtw_cc.distance_matrix_ndim(s, ndim, **dist_opts) else: dists = dtw_cc.distance_matrix(s, **dist_opts) - elif not use_c and parallel: + elif not settings.use_c and parallel: logger.info("Compute distances in Python (parallel=MP)") idxs = _distance_matrix_idxs(block, len(s)) - if use_ndim: + if settings.use_ndim: fn = _distance_with_params_ndim else: fn = _distance_with_params with mp.Pool() as p: dists = p.map(fn, [(s[r], s[c], dist_opts) for c, r in zip(*idxs)]) - elif not use_c and not parallel: + elif not settings.use_c and not parallel: logger.info("Compute distances in Python (parallel=No)") - dists = distance_matrix_python(s, block=block, show_progress=show_progress, - dist_opts=dist_opts, use_ndim=use_ndim) + dists = distance_matrix_python(s, block=block, show_progress=show_progress, settings=settings) else: raise Exception(f'Unsupported combination of: parallel={parallel}, ' - f'use_c={use_c}, dtw_cc_omp={dtw_cc_omp}, use_mp={use_mp}') + f'use_c={s.use_c}, dtw_cc_omp={dtw_cc_omp}, use_mp={use_mp}') exp_length = _distance_matrix_length(block, len(s)) assert len(dists) == exp_length, "len(dists)={} != {}".format(len(dists), exp_length) @@ -883,9 +827,9 @@ def distance_array_index(a, b, nb_series): return idx -def distance_matrix_python(s, block=None, show_progress=False, dist_opts=None, use_ndim=False): - if dist_opts is None: - dist_opts = {} +def distance_matrix_python(s, block=None, show_progress=False, settings=None): + if settings is None: + settings = DTWSettings() dists = array.array('d', [inf] * _distance_matrix_length(block, len(s))) block, triu = _complete_block(block, len(s)) it_r = range(block[0][0], block[0][1]) @@ -898,7 +842,7 @@ def distance_matrix_python(s, block=None, show_progress=False, dist_opts=None, u else: it_c = range(block[1][0], min(len(s), block[1][1])) for c in it_c: - dists[idx] = distance(s[r], s[c], use_ndim=use_ndim, **dist_opts) + dists[idx] = distance(s[r], s[c], **settings.kwargs()) idx += 1 return dists diff --git a/dtaidistance/innerdistance.py b/dtaidistance/innerdistance.py index b8fe91cf..526a8b9c 100644 --- a/dtaidistance/innerdistance.py +++ b/dtaidistance/innerdistance.py @@ -49,6 +49,10 @@ def result(x): return np.sqrt(x) return math.sqrt(x) + @staticmethod + def inner_val(x): + return x*x + class SquaredEuclideanNdim: @@ -60,6 +64,10 @@ def inner_dist(x, y): def result(x): return np.sqrt(x) + @staticmethod + def inner_val(x): + return x * x + class Euclidean: @@ -71,6 +79,10 @@ def inner_dist(x, y): def result(x): return x + @staticmethod + def inner_val(x): + return x + class EuclideanNdim: @@ -82,6 +94,10 @@ def inner_dist(x, y): def result(x): return x + @staticmethod + def inner_val(x): + return x + class CustomInnerDist: @@ -108,8 +124,13 @@ def result(x): """ raise Exception("Function not defined") + @staticmethod + def inner_val(x): + """The transformation applied to input settings like max_step.""" + raise Exception("Function not defined") -def inner_dist_fns(inner_dist="squared euclidean", use_ndim=False): + +def inner_dist_cls(inner_dist="squared euclidean", use_ndim=False): use_cls = None if inner_dist == "squared euclidean": if use_ndim: @@ -124,8 +145,13 @@ def inner_dist_fns(inner_dist="squared euclidean", use_ndim=False): elif hasattr(inner_dist, 'inner_dist') and hasattr(inner_dist, 'result'): use_cls = inner_dist else: - raise AttributeError("Unknown value for argument inner_dist") - return use_cls.inner_dist, use_cls.result + raise AttributeError(f"Unknown value for argument inner_dist: {inner_dist}") + return use_cls + + +def inner_dist_fns(inner_dist="squared euclidean", use_ndim=False): + use_cls = inner_dist_cls(inner_dist, use_ndim) + return use_cls.inner_dist, use_cls.result, use_cls.inner_val def to_c(inner_dist): From 57f45cca101312788610399879979f89a3de789d Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 28 Mar 2024 23:49:33 +0100 Subject: [PATCH 18/33] docs --- dtaidistance/dtw.py | 58 ++++++++++++++++++-------------------- dtaidistance/dtw_ndim.py | 39 ++----------------------- dtaidistance/exceptions.py | 5 ++++ 3 files changed, 34 insertions(+), 68 deletions(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 581ab189..f275514d 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -11,25 +11,19 @@ """ import logging -import math import array +import math from . import ed from . import util from . import util_numpy from . import innerdistance from .util import SeriesContainer -from .exceptions import NumpyException +from .exceptions import NumpyException, CythonException logger = logging.getLogger("be.kuleuven.dtai.distance") -dtw_ndim = None -try: - from . import dtw_ndim -except ImportError: - logger.debug('DTAIDistance ndim library not available') - dtw_cc = None try: from . import dtw_cc @@ -92,7 +86,7 @@ def _check_library(include_omp=False, raise_exception=True): "See the documentation for alternative installation options." logger.error(msg) if raise_exception: - raise Exception(msg) + raise CythonException(msg) if include_omp and (dtw_cc_omp is None or not dtw_cc_omp.is_openmp_supported()): msg = "The compiled dtaidistance C-OMP library " if dtw_cc_omp and not dtw_cc_omp.is_openmp_supported(): @@ -104,7 +98,7 @@ def _check_library(include_omp=False, raise_exception=True): "See the documentation for alternative installation options." logger.error(msg) if raise_exception: - raise Exception(msg) + raise CythonException(msg) class DTWSettings: @@ -130,7 +124,7 @@ def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, :param inner_dist: Distance between two points in the time series. One of 'squared euclidean' (default), 'euclidean'. When using the pure Python implementation (thus use_c=False) then the argument can also - be an object that has as callable arguments 'inner_dist' and 'result'. The 'inner_dist' + be an object that has as callable arguments 'inner_dist', 'result', and 'inner_val'. The 'inner_dist' function computes the distance between two points (e.g., squared euclidean) and 'result' is the function to apply to the final distance (e.g., sqrt when using squared euclidean). You can also inherit from the 'innerdistance.CustomInnerDist' class. @@ -165,6 +159,11 @@ def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, else: self.adj_penalty = inner_val(self.penalty) + if self.max_length_diff is None: + self.adj_max_length_diff = inf + else: + self.adj_max_length_diff = self.max_length_diff + @staticmethod def for_dtw(s1, s2, **kwargs): settings = DTWSettings(**kwargs) @@ -196,7 +195,8 @@ def c_kwargs(self): window = 0 if self.window is None else self.window max_dist = 0 if self.max_dist is None else self.max_dist max_step = 0 if self.max_step is None else self.max_step - max_length_diff = 0 if self.max_length_diff is None else self.max_length_diff + max_length_diff = 0 if (self.max_length_diff is None or math.isinf(self.max_length_diff)) \ + else self.max_length_diff penalty = 0 if self.penalty is None else self.penalty psi = 0 if self.psi is None else self.psi use_pruning = 0 if self.use_pruning is None else self.use_pruning @@ -282,19 +282,16 @@ def distance(s1, s2, only_ub=False, **kwargs): return distance_fast(s1, s2, **s.kwargs()) idist_fn, result_fn, ival_fn = innerdistance.inner_dist_fns(s.inner_dist, use_ndim=s.use_ndim) r, c = len(s1), len(s2) - if s.max_length_diff is not None and abs(r - c) > s.max_length_diff: + if s.adj_max_length_diff is not None and abs(r - c) > s.adj_max_length_diff: return inf if only_ub: return ival_fn(ub_euclidean(s1, s2, inner_dist=s.inner_dist)) psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(s.psi) length = min(c + 1, abs(r - c) + 2 * (s.window - 1) + 1 + 1 + 1) - # print("length (py) = {}".format(length)) dtw = array.array('d', [inf] * (2 * length)) sc = 0 ec = 0 - ec_next = 0 - smaller_found = False for i in range(psi_2b + 1): dtw[i] = 0 skip = 0 @@ -363,7 +360,7 @@ def distance(s1, s2, only_ub=False, **kwargs): def distance_fast(s1, s2, only_ub=False, **kwargs): - """Same as :meth:`distance` but with different defaults to chose the fast C-based version of + """Same as :meth:`distance` but with different defaults to choose the fast C-based version of the implementation (use_c = True). Note: the series are expected to be arrays of the type ``double``. @@ -423,7 +420,7 @@ def warping_paths(s1, s2, psi_neg=True, **kwargs): :param s1: First sequence :param s2: Second sequence :param psi_neg: Replace values that should be skipped because of psi-relaxation with -1. - :param kwargs: see :class:`DTWSettings` + :param kwargs: See arguments for :class:`DTWSettings` :returns: (DTW distance, DTW matrix) """ s = DTWSettings.for_dtw(s1, s2, **kwargs) @@ -433,7 +430,7 @@ def warping_paths(s1, s2, psi_neg=True, **kwargs): raise NumpyException("Numpy is required for the warping_paths method") cost, result_fn, ival_fn = innerdistance.inner_dist_fns(s.inner_dist, use_ndim=s.use_ndim) r, c = len(s1), len(s2) - if s.max_length_diff is not None and abs(r - c) > s.max_length_diff: + if s.adj_max_length_diff is not None and abs(r - c) > s.adj_max_length_diff: return inf psi_1b, psi_1e, psi_2b, psi_2e = _process_psi_arg(s.psi) dtw = np.full((r + 1, c + 1), inf) @@ -442,12 +439,9 @@ def warping_paths(s1, s2, psi_neg=True, **kwargs): dtw[0, i] = 0 for i in range(psi_1b + 1): dtw[i, 0] = 0 - i0 = 1 i1 = 0 sc = 0 ec = 0 - smaller_found = False - ec_next = 0 for i in range(r): i0 = i i1 = i + 1 @@ -521,16 +515,18 @@ def warping_paths(s1, s2, psi_neg=True, **kwargs): def warping_paths_fast(s1, s2, psi_neg=True, compact=False, **kwargs): """Fast C version of :meth:`warping_paths`. - Additional parameters: - :param compact: Return a compact warping paths matrix. + :param s1: See :meth:`warping_paths` + :param s2: See :meth:`warping_paths` + :param psi_neg: See :meth:`warping_paths` + :param compact: Return a compact warping paths matrix. Size is ((l1 + 1), min(l2 + 1, abs(l1 - l2) + 2*window + 1)). This option is meant for internal use. For more details, see the C code. + :param kwargs: See :meth:`warping_paths` """ s1 = util_numpy.verify_np_array(s1) s2 = util_numpy.verify_np_array(s2) r = len(s1) c = len(s2) - s = DTWSettings(**kwargs) _check_library(raise_exception=True) settings = DTWSettings.for_dtw(s1, s2, **kwargs) if compact: @@ -698,7 +694,7 @@ def distance_matrix(s, block=None, compact=False, parallel=False, the pure Python version (thus not the C-based implementations). :param only_triu: Only compute upper traingular matrix of warping paths. This is useful if s1 and s2 are the same series and the matrix would be mirrored around the diagonal. - :param kwargs: Arguments for :class:`DTWSettings` + :param kwargs: See arguments for :class:`DTWSettings` :returns: The distance matrix or the condensed distance matrix if the compact argument is true """ settings = DTWSettings(**kwargs) @@ -728,9 +724,6 @@ def distance_matrix(s, block=None, compact=False, parallel=False, ndim = s.detected_ndim else: ndim = 1 - if settings.max_length_diff is None: - max_length_diff = inf - dists = None if settings.use_c: for k, v in dist_opts.items(): if v is None: @@ -925,7 +918,7 @@ def distance_matrix_fast(s, max_dist=None, use_pruning=False, max_length_diff=No if not use_mp and parallel: try: _check_library(raise_exception=True, include_omp=True) - except Exception: + except CythonException: use_mp = True return distance_matrix(s, max_dist=max_dist, use_pruning=use_pruning, max_length_diff=max_length_diff, window=window, @@ -1035,8 +1028,10 @@ def warp(from_s, to_s, path=None, **kwargs): def best_path(paths, row=None, col=None, use_max=False): """Compute the optimal path from the nxm warping paths matrix. + :param paths: Warping paths matrix :param row: If given, start from this row (instead of lower-right corner) :param col: If given, start from this column (instead of lower-right corner) + :param use_max: Find maximal path instead of minimal path :return: Array of (row, col) representing the best path """ if use_max: @@ -1101,11 +1096,12 @@ def warping_path_args_to_c(s1, s2, **kwargs): s1 = util_numpy.verify_np_array(s1) s2 = util_numpy.verify_np_array(s2) _check_library(raise_exception=True) + def get(key): value = kwargs.get(key, None) if value is None: return 0 return value settings_kwargs = {key: get(key) for key in - ['window', 'max_dist', 'max_step', 'max_length_diff', 'penalty', 'psi']} + ['window', 'max_dist', 'max_step', 'max_length_diff', 'penalty', 'psi']} return s1, s2, settings_kwargs diff --git a/dtaidistance/dtw_ndim.py b/dtaidistance/dtw_ndim.py index 3d226c4c..2c743318 100644 --- a/dtaidistance/dtw_ndim.py +++ b/dtaidistance/dtw_ndim.py @@ -6,7 +6,7 @@ Dynamic Time Warping (DTW) for N-dimensional series. All the functionality in this subpackage is also available in -the dtw subpackage with argument use_ndim. +the dtw subpackage with argument ``use_ndim=True``. :author: Wannes Meert :copyright: Copyright 2017-2024 KU Leuven, DTAI Research Group. @@ -15,52 +15,17 @@ """ import os import logging -import math -import array from . import dtw from . import ed -from .dtw import _check_library, SeriesContainer, _distance_matrix_idxs, distances_array_to_matrix,\ - _distance_matrix_length, _complete_block +from .dtw import SeriesContainer from . import util_numpy from . import innerdistance -from .exceptions import NumpyException - -try: - if util_numpy.test_without_numpy(): - raise ImportError() - import numpy as np - array_min = np.min -except ImportError: - np = None - array_min = min logger = logging.getLogger("be.kuleuven.dtai.distance") dtaidistance_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), os.pardir) -try: - from . import dtw_cc -except ImportError: - # logger.info('C library not available') - dtw_cc = None - -dtw_cc_omp = None -try: - from . import dtw_cc_omp -except ImportError: - logger.debug('DTAIDistance C-OMP library not available') - dtw_cc_omp = None - -try: - from tqdm import tqdm -except ImportError: - logger.info('tqdm library not available') - tqdm = None - - -inf = float("inf") - def ub_euclidean(s1, s2, inner_dist=innerdistance.default): """ Euclidean (dependent) distance between two n-dimensional sequences. Supports different lengths. diff --git a/dtaidistance/exceptions.py b/dtaidistance/exceptions.py index a7a026cd..14d61f35 100644 --- a/dtaidistance/exceptions.py +++ b/dtaidistance/exceptions.py @@ -20,6 +20,11 @@ def __init__(self, message): super().__init__(message) +class CythonException(PackageMissingException): + def __init__(self, message): + super().__init__(message) + + class PyClusteringException(PackageMissingException): def __init__(self, message): super().__init__(message) From f9ea1526e7384f74deaabd2417a3a7e8101d7227 Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 29 Mar 2024 10:12:32 +0100 Subject: [PATCH 19/33] readthedocs --- .readthedocs.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 5c28483a..3d0af717 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,7 +1,5 @@ version: 2 -requirements_file: requirements.txt - build: os: ubuntu-22.04 tools: From d676c7a9eaf8a30b541701ac29ade8812e918cff Mon Sep 17 00:00:00 2001 From: wannesm Date: Fri, 29 Mar 2024 14:17:25 +0100 Subject: [PATCH 20/33] docs --- dtaidistance/dtw.py | 32 ++++++++++++++----- dtaidistance/ed.py | 4 +-- dtaidistance/innerdistance.py | 6 ++-- dtaidistance/preprocessing.py | 1 + dtaidistance/subsequence/subsequencesearch.py | 10 +++--- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index f275514d..67a40fbd 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -108,7 +108,7 @@ def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, """Settings for Dynamic Time Warping distance methods. :param window: Only allow for maximal shifts from the two diagonals smaller than this number. - It includes the diagonal, meaning that an Euclidean distance is obtained by setting + It includes the diagonal, meaning that Euclidean distance is obtained by setting ``window=1.`` :param max_dist: Stop if the returned values will be larger than this value :param max_step: Do not allow steps larger than this value @@ -551,7 +551,7 @@ def warping_paths_affinity(s1, s2, window=None, only_triu=False, gamma=1, tau=0, delta=0, delta_factor=1, use_c=False): """ - Dynamic Time Warping warping paths using an affinity/similarity matrix instead of a distance matrix. + Dynamic Time Warping paths using an affinity/similarity matrix instead of a distance matrix. The full matrix of all warping paths (or accumulated cost matrix) is built. @@ -563,6 +563,11 @@ def warping_paths_affinity(s1, s2, window=None, only_triu=False, :param penalty: see :meth:`distance` :param psi: see :meth:`distance` :param psi_neg: Replace values that should be skipped because of psi-relaxation with -1. + :param gamma: + :param tau: + :param delta: + :param delta_factor: + :param use_c: :returns: (DTW distance, DTW matrix) """ if use_c: @@ -584,7 +589,6 @@ def warping_paths_affinity(s1, s2, window=None, only_triu=False, dtw[0, i] = 0 for i in range(psi_1b + 1): dtw[i, 0] = 0 - i0 = 1 i1 = 0 for i in range(r): i0 = i @@ -641,10 +645,21 @@ def warping_paths_affinity_fast(s1, s2, window=None, only_triu=False, compact=False, use_ndim=False): """Fast C version of :meth:`warping_paths`. - Additional parameters: - :param compact: Return a compact warping paths matrix. + :param s1: + :param s2: + :param window: + :param only_triu: + :param penalty: + :param psi: + :param psi_neg: + :param gamma: + :param tau: + :param delta: + :param delta_factor: + :param compact: Return a compact warping paths matrix. Size is ((l1 + 1), min(l2 + 1, abs(l1 - l2) + 2*window + 1)). This option is meant for internal use. For more details, see the C code. + :param use_ndim: """ s1 = util_numpy.verify_np_array(s1) s2 = util_numpy.verify_np_array(s2) @@ -657,7 +672,8 @@ def warping_paths_affinity_fast(s1, s2, window=None, only_triu=False, wps_compact = np.full((len(s1)+1, wps_width), -inf) if use_ndim: d = dtw_cc.warping_paths_compact_ndim_affinity(wps_compact, s1, s2, only_triu, - gamma, tau, delta, delta_factor, psi_neg, **settings.c_kwargs()) + gamma, tau, delta, delta_factor, psi_neg, + **settings.c_kwargs()) else: d = dtw_cc.warping_paths_compact_affinity(wps_compact, s1, s2, only_triu, gamma, tau, delta, delta_factor, psi_neg, **settings.c_kwargs()) @@ -911,7 +927,7 @@ def distance_matrix_fast(s, max_dist=None, use_pruning=False, max_length_diff=No fast parallized C version (use_c = True and parallel = True). This method uses the C-compiled version of the DTW algorithm and uses parallelization. - By default this is the OMP C parallelization. If the OMP functionality is not available + By default, this is the OMP C parallelization. If the OMP functionality is not available the parallelization is changed to use Python's multiprocessing library. """ _check_library(raise_exception=True, include_omp=False) @@ -988,7 +1004,7 @@ def warping_path_penalty(s1, s2, penalty_post=0, **kwargs): :param s2: Second sequence :param penalty_post: Penalty to be added after path calculation, for compression/extension - :returns [DTW distance, best path, DTW distance between 2 path elements, DTW matrix] + :returns DTW distance, the best path, DTW distance between 2 path elements, DTW matrix """ dist, paths = warping_paths(s1, s2, **kwargs) path = best_path(paths) diff --git a/dtaidistance/ed.py b/dtaidistance/ed.py index c819d884..8c4b1876 100644 --- a/dtaidistance/ed.py +++ b/dtaidistance/ed.py @@ -6,12 +6,11 @@ Euclidean Distance (ED) :author: Wannes Meert -:copyright: Copyright 2020 KU Leuven, DTAI Research Group. +:copyright: Copyright 2020-2024 KU Leuven, DTAI Research Group. :license: Apache License, Version 2.0, see LICENSE for details. """ import logging -import math from . import util_numpy from . import innerdistance @@ -55,6 +54,7 @@ def distance(s1, s2, inner_dist=innerdistance.default, use_ndim=False): :param s1: Sequence of numbers :param s2: Sequence of numbers :param inner_dist: Inner distance function between two values + :param use_ndim: Use n-dimensional methods :return: Euclidean distance """ idist_fn, result_fn = innerdistance.inner_dist_fns(inner_dist=inner_dist, use_ndim=use_ndim) diff --git a/dtaidistance/innerdistance.py b/dtaidistance/innerdistance.py index 526a8b9c..2b07f1e2 100644 --- a/dtaidistance/innerdistance.py +++ b/dtaidistance/innerdistance.py @@ -16,6 +16,10 @@ from . import util from . import util_numpy +# The available inner distances, with their integer identifier are: +# - 0: squared euclidean +# - 1: euclidean + try: if util_numpy.test_without_numpy(): raise ImportError() @@ -131,7 +135,6 @@ def inner_val(x): def inner_dist_cls(inner_dist="squared euclidean", use_ndim=False): - use_cls = None if inner_dist == "squared euclidean": if use_ndim: use_cls = SquaredEuclideanNdim @@ -163,4 +166,3 @@ def to_c(inner_dist): raise AttributeError('Custom inner distance functions are not supported for the fast C implementation') else: raise AttributeError('Unknown inner_dist: {}'.format(inner_dist)) - diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index e2377431..f31d1b6d 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -28,6 +28,7 @@ def differencing(series, smooth=None, diff_args=None): import numpy as np except ImportError: raise NumpyException("Differencing requires Numpy") + axis = 0 if isinstance(series, np.ndarray): if len(series.shape) == 1: axis = 0 diff --git a/dtaidistance/subsequence/subsequencesearch.py b/dtaidistance/subsequence/subsequencesearch.py index 18f348ff..bed99ca3 100644 --- a/dtaidistance/subsequence/subsequencesearch.py +++ b/dtaidistance/subsequence/subsequencesearch.py @@ -119,12 +119,12 @@ def __init__(self, ss, k=None): def __getitem__(self, key): if isinstance(key, slice): start = 0 if key.start is None else key.start - return [SSMatch(kip+start, self.ss) for kip, (v, i) in + return [SSMatch(kip+start, self.ss) for kip, (_v, _i) in enumerate(self.ss.kbest_distances[key])] return SSMatch(key, self.ss) def __iter__(self): - for ki, (v, i) in enumerate(self.ss.kbest_distances[:self.k]): + for ki, (_v, _i) in enumerate(self.ss.kbest_distances[:self.k]): yield SSMatch(ki, self.ss) def __len__(self): @@ -178,7 +178,7 @@ def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances self.keep_all_distances = keep_all_distances # if self.use_lb and not self.keep_all_distances: - # raise ValueError("If use_lb is true, then keep_all_distances should also be true.") + # raise ValueError("If argument use_lb is true, then keep_all_distances should also be true.") def reset(self): self.distances = None @@ -280,7 +280,7 @@ def kbest_matches(self, k=1): """Return the k best matches. It is recommended to set k to a value, and not None. - If k is set to None, all comparisons are kept and returned. Also no early + If k is set to None, all comparisons are kept and returned. Also, no early stopping is applied in case k is None. :param k: Number of best matches to return (default is 1) @@ -291,7 +291,7 @@ def kbest_matches(self, k=1): # return [SSMatch(best_idx, self) for best_idx in range(len(self.distances))] # if self.keep_all_distances: # best_idxs = np.argpartition(self.distances, k) - # return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] + # return [SSMatch(best_idx, self) for best_idx in best_idxs[:k]] # distances = reversed(sorted(self.h)) # return [SSMatch(best_idx, self) for dist, best_idx in distances] return SSMatches(self) From 2304cd7b23020660182dfc95e148a6e94ec30501 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 9 Apr 2024 14:28:52 +0200 Subject: [PATCH 21/33] docs --- docs/conf.py | 5 +---- docs/modules/subsequence.rst | 4 +++- docs/modules/subsequence/dtw.rst | 5 ----- docs/modules/subsequence/localconcurrences.rst | 5 +++++ docs/modules/subsequence/subsequencealignment.rst | 5 +++++ docs/modules/subsequence/subsequencesearch.rst | 5 +++++ dtaidistance/preprocessing.py | 4 ++++ dtaidistance/subsequence/subsequencesearch.py | 4 ++++ 8 files changed, 27 insertions(+), 10 deletions(-) delete mode 100644 docs/modules/subsequence/dtw.rst create mode 100644 docs/modules/subsequence/localconcurrences.rst create mode 100644 docs/modules/subsequence/subsequencealignment.rst create mode 100644 docs/modules/subsequence/subsequencesearch.rst diff --git a/docs/conf.py b/docs/conf.py index 16b67949..2998ad3b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,7 +33,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.mathjax'] + 'sphinx.ext.mathjax'] autoclass_content = 'both' @@ -157,6 +157,3 @@ author, 'DTAIDistance', 'One line description of project.', 'Miscellaneous'), ] - - - diff --git a/docs/modules/subsequence.rst b/docs/modules/subsequence.rst index f1b0ccaa..a503b69f 100644 --- a/docs/modules/subsequence.rst +++ b/docs/modules/subsequence.rst @@ -5,4 +5,6 @@ subsequence .. toctree:: :caption: Subsequence - subsequence/dtw + subsequence/subsequencesearch + subsequence/subsequencealignment + subsequence/localconcurrences diff --git a/docs/modules/subsequence/dtw.rst b/docs/modules/subsequence/dtw.rst deleted file mode 100644 index 1d4ddb84..00000000 --- a/docs/modules/subsequence/dtw.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: dtaidistance.subsequence.dtw - :members: - :undoc-members: - :inherited-members: diff --git a/docs/modules/subsequence/localconcurrences.rst b/docs/modules/subsequence/localconcurrences.rst new file mode 100644 index 00000000..0d75eacd --- /dev/null +++ b/docs/modules/subsequence/localconcurrences.rst @@ -0,0 +1,5 @@ + +.. automodule:: dtaidistance.subsequence.localconcurrences + :members: + :undoc-members: + :inherited-members: diff --git a/docs/modules/subsequence/subsequencealignment.rst b/docs/modules/subsequence/subsequencealignment.rst new file mode 100644 index 00000000..728f4c41 --- /dev/null +++ b/docs/modules/subsequence/subsequencealignment.rst @@ -0,0 +1,5 @@ + +.. automodule:: dtaidistance.subsequence.subsequencealignment + :members: + :undoc-members: + :inherited-members: diff --git a/docs/modules/subsequence/subsequencesearch.rst b/docs/modules/subsequence/subsequencesearch.rst new file mode 100644 index 00000000..60ad6cf0 --- /dev/null +++ b/docs/modules/subsequence/subsequencesearch.rst @@ -0,0 +1,5 @@ + +.. automodule:: dtaidistance.subsequence.subsequencesearch + :members: + :undoc-members: + :inherited-members: diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index f31d1b6d..0f0f4ae0 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -45,6 +45,9 @@ def differencing(series, smooth=None, diff_args=None): def smoothing(series, smooth): """Smooth the series. + Since version 2.4 the filter uses Gustafsson’s method for handling the edges + of the series. + :param series: Time series (must be numpy compatible) :param smooth: Smooth the series by removing the highest frequencies. The cut-off frequency is computed using the `smooth` argument. This @@ -106,6 +109,7 @@ def mixedlinearlogdomain(series, c=10): x if x<=c c+ln(x-c+1) if x>c + :type c: Union[int,list] :param series: Time series (must be numpy compatible) :param c: Switch between linear to log domain at this value, should be <= 1. If two numbers are given as a tuple, the first one is used for positive diff --git a/dtaidistance/subsequence/subsequencesearch.py b/dtaidistance/subsequence/subsequencesearch.py index bed99ca3..85341d9e 100644 --- a/dtaidistance/subsequence/subsequencesearch.py +++ b/dtaidistance/subsequence/subsequencesearch.py @@ -138,6 +138,10 @@ def __str__(self): class SubsequenceSearch: + """ + :type distances: Optional[Iterable] + """ + def __init__(self, query, s, dists_options=None, use_lb=True, keep_all_distances=False, max_dist=None, max_value=None, use_c=None, use_ndim=None): """Search the best matching (subsequence) time series compared to a given time series. From 1775b68c807a6fe2d9074493525c131a3c6458eb Mon Sep 17 00:00:00 2001 From: wannesm Date: Wed, 10 Apr 2024 16:51:18 +0200 Subject: [PATCH 22/33] Symbolization with max_rangefactor --- .../subsequence/subsequencealignment.py | 41 ++++++++++++++++++- dtaidistance/symbolization/alignment.py | 16 +++++--- tests/test_subsequence.py | 14 ++++++- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/dtaidistance/subsequence/subsequencealignment.py b/dtaidistance/subsequence/subsequencealignment.py index 87f420db..95aaba41 100644 --- a/dtaidistance/subsequence/subsequencealignment.py +++ b/dtaidistance/subsequence/subsequencealignment.py @@ -196,10 +196,11 @@ def best_match(self): best_idx = np.argmin(self.matching) return self.get_match(best_idx) - def kbest_matches_fast(self, k=1, overlap=0, minlength=2): + def kbest_matches_fast(self, *args, **kwargs): + """See :meth:`kbest_matches`.""" use_c = self.use_c self.use_c = True - result = self.kbest_matches(k=k, overlap=overlap, minlength=minlength) + result = self.kbest_matches(*args, **kwargs) self.use_c = use_c return result @@ -210,18 +211,54 @@ def kbest_matches(self, k=1, overlap=0, minlength=2, maxlength=None): :param overlap: Matches cannot overlap unless overlap > 0. :param minlength: Minimal length of the matched sequence. If k is set to None, matches with one value can occur if minlength is set to 1. + :param maxlength: Maximal length of the matched sequence. :return: Yield an SAMatch object """ + return self._best_matches(k=k, overlap=overlap, + minlength=minlength, maxlength=maxlength) + + def best_matches_fast(self, *args, **kwargs): + """See :meth:`best_matches`.""" + use_c = self.use_c + self.use_c = True + result = self.best_matches(*args, **kwargs) + self.use_c = use_c + return result + + def best_matches(self, max_rangefactor=2, overlap=0, minlength=2, maxlength=None): + """Yields the next best match. Stops when the current match is larger than + maxrangefactor times the first match. + + :param max_rangefactor: The range between the first (best) match and the last match + can be at most a factor of ``maxrangefactor``. For example, if the first match has + value v_f, then the last match has a value ``v_l < v_f*maxfactorrange``. + :param overlap: Matches cannot overlap unless ``overlap > 0``. + :param minlength: Minimal length of the matched sequence. + :param maxlength: Maximal length of the matched sequence. + :return: + """ + return self._best_matches(k=None, max_rangefactor=max_rangefactor, + overlap=overlap, + minlength=minlength, maxlength=maxlength) + + def _best_matches(self, k=None, overlap=0, minlength=2, maxlength=None, max_rangefactor=None): self.align() matching = np.array(self.matching) maxv = np.ceil(np.max(matching) + 1) matching[:min(len(self.query) - 1, overlap)] = maxv ki = 0 + max_dist = np.inf while k is None or ki < k: best_idx = np.argmin(matching) if best_idx == 0 or np.isinf(matching[best_idx]) or matching[best_idx] == maxv: # No more matches found break + if max_rangefactor is not None: + if ki == 0: + max_dist = matching[best_idx] * max_rangefactor + elif matching[best_idx] > max_dist: + # Remaining matches are larger than a factor of the first match + break match = self.get_match(best_idx) b, e = match.segment cur_overlap = min(overlap, e - b - 1) diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index a9f2f2f1..d0ed5926 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -80,12 +80,15 @@ def align2(self, series): self.symbols = best_patterns return best_patterns - def align(self, series, max_overlap=None): + def align(self, series, max_rangefactor=None, max_overlap=None): """Perform alignment. Only one motif is matched to a subsequence. No aggregation within the same subsequence. :param series: List of time series or a numpy array + :param max_rangefactor: the range between the first (best) match and the last match + can be at most a factor of ``maxrangefactor``. For example, if the first match has + value v_f, then the last match has a value ``v_l < v_f*maxfactorrange``. :param max_overlap: Maximal overlap when matching codewords. If not given, this is based on maxcompression and maxexpansion. """ @@ -103,9 +106,9 @@ def align(self, series, max_overlap=None): for midx in range(len(self.codebook)): medoidd = np.array(self.codebook[midx]) sa = subsequence_alignment(medoidd, curseries, use_c=self.use_c) - for match in sa.kbest_matches(k=None, - minlength=math.floor(len(medoidd)*self.maxcompression), - maxlength=math.ceil(len(medoidd)*self.maxexpansion)): + for match in sa.best_matches(max_rangefactor=max_rangefactor, + minlength=math.floor(len(medoidd)*self.maxcompression), + maxlength=math.ceil(len(medoidd)*self.maxexpansion)): patterns.append((midx, match.segment[0], match.segment[1]+1, curseries[match.segment[0]:match.segment[1]+1], match.value)) max_value = max(max_value, match.value) @@ -151,10 +154,11 @@ def align(self, series, max_overlap=None): self.symbols = best_patterns return best_patterns - def align_fast(self, series): + def align_fast(self, *args, **kwargs): + """See :meth:`align`.""" use_c = self.use_c self.use_c = True - result = self.align(series) + result = self.align(*args, **kwargs) self.use_c = use_c return result diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 01a4dc74..8c88ab89 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -12,7 +12,7 @@ from dtaidistance.dtw import lb_keogh from dtaidistance import dtw, dtw_ndim -directory = None +directory = Path(os.environ['TESTDIR']) if 'TESTDIR' in os.environ else None numpyonly = pytest.mark.skipif("util_numpy.test_without_numpy()") @@ -44,6 +44,18 @@ def test_dtw_subseq1(): assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] +@numpyonly +def test_dtw_subseq1_maxrangefactor(): + with util_numpy.test_uses_numpy() as np: + query = np.array([1., 2, 0]) + series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0, 5, 6, 0]) + sa = subsequence_alignment(query, series) + best_k = list(sa.best_matches(max_rangefactor=1.2)) + assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] + best_k = list(sa.best_matches_fast(max_rangefactor=1.2)) + assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] + + @numpyonly def test_dtw_subseq_eeg(): with util_numpy.test_uses_numpy() as np: From c50b95e963462fe8fbc023bbaa016744d53a468a Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 11 Apr 2024 14:40:22 +0200 Subject: [PATCH 23/33] align with knee detection --- .../subsequence/subsequencealignment.py | 38 ++++++++++++++++++- dtaidistance/symbolization/alignment.py | 16 +++++--- dtaidistance/util.py | 30 +++++++++++++++ tests/test_subsequence.py | 13 ++++++- 4 files changed, 89 insertions(+), 8 deletions(-) diff --git a/dtaidistance/subsequence/subsequencealignment.py b/dtaidistance/subsequence/subsequencealignment.py index 95aaba41..e673a614 100644 --- a/dtaidistance/subsequence/subsequencealignment.py +++ b/dtaidistance/subsequence/subsequencealignment.py @@ -238,19 +238,50 @@ def best_matches(self, max_rangefactor=2, overlap=0, minlength=2, maxlength=None :return: """ return self._best_matches(k=None, max_rangefactor=max_rangefactor, + detectknee_alpha=None, overlap=overlap, minlength=minlength, maxlength=maxlength) - def _best_matches(self, k=None, overlap=0, minlength=2, maxlength=None, max_rangefactor=None): + def best_matches_knee_fast(self, *args, **kwargs): + """See :meth:`best_matches_knee`.""" + use_c = self.use_c + self.use_c = True + result = self.best_matches_knee(*args, **kwargs) + self.use_c = use_c + return result + + def best_matches_knee(self, alpha=0.3, overlap=0, minlength=2, maxlength=None): + """Yields the next best match. Stops when the current match is larger than + maxrangefactor times the first match. + + :param alpha: The factor for the exponentially moving average that keeps + track of the curve to detect the knee. The higher, the more sensitive + to recent values (and differences). + :param overlap: Matches cannot overlap unless ``overlap > 0``. + :param minlength: Minimal length of the matched sequence. + :param maxlength: Maximal length of the matched sequence. + :return: + """ + return self._best_matches(k=None, max_rangefactor=None, + detectknee_alpha=alpha, + overlap=overlap, + minlength=minlength, maxlength=maxlength) + + def _best_matches(self, k=None, overlap=0, minlength=2, maxlength=None, + max_rangefactor=None, detectknee_alpha=None): self.align() matching = np.array(self.matching) maxv = np.ceil(np.max(matching) + 1) matching[:min(len(self.query) - 1, overlap)] = maxv ki = 0 max_dist = np.inf + if detectknee_alpha is not None: + dk = util.DetectKnee(alpha=detectknee_alpha) + else: + dk = None while k is None or ki < k: best_idx = np.argmin(matching) - if best_idx == 0 or np.isinf(matching[best_idx]) or matching[best_idx] == maxv: + if np.isinf(matching[best_idx]) or matching[best_idx] == maxv: # No more matches found break if max_rangefactor is not None: @@ -259,6 +290,9 @@ def _best_matches(self, k=None, overlap=0, minlength=2, maxlength=None, max_rang elif matching[best_idx] > max_dist: # Remaining matches are larger than a factor of the first match break + if detectknee_alpha is not None: + if dk.dostop(matching[best_idx]): + break match = self.get_match(best_idx) b, e = match.segment cur_overlap = min(overlap, e - b - 1) diff --git a/dtaidistance/symbolization/alignment.py b/dtaidistance/symbolization/alignment.py index d0ed5926..68ac9c3a 100644 --- a/dtaidistance/symbolization/alignment.py +++ b/dtaidistance/symbolization/alignment.py @@ -80,7 +80,7 @@ def align2(self, series): self.symbols = best_patterns return best_patterns - def align(self, series, max_rangefactor=None, max_overlap=None): + def align(self, series, max_rangefactor=None, detectknee_alpha=None, max_overlap=None): """Perform alignment. Only one motif is matched to a subsequence. No aggregation within the same subsequence. @@ -106,9 +106,15 @@ def align(self, series, max_rangefactor=None, max_overlap=None): for midx in range(len(self.codebook)): medoidd = np.array(self.codebook[midx]) sa = subsequence_alignment(medoidd, curseries, use_c=self.use_c) - for match in sa.best_matches(max_rangefactor=max_rangefactor, - minlength=math.floor(len(medoidd)*self.maxcompression), - maxlength=math.ceil(len(medoidd)*self.maxexpansion)): + if max_rangefactor is not None: + itr = sa.best_matches(max_rangefactor=max_rangefactor, + minlength=math.floor(len(medoidd) * self.maxcompression), + maxlength=math.ceil(len(medoidd) * self.maxexpansion)) + else: + itr = sa.best_matches_knee(alpha=detectknee_alpha, + minlength=math.floor(len(medoidd) * self.maxcompression), + maxlength=math.ceil(len(medoidd) * self.maxexpansion)) + for match in itr: patterns.append((midx, match.segment[0], match.segment[1]+1, curseries[match.segment[0]:match.segment[1]+1], match.value)) max_value = max(max_value, match.value) @@ -198,7 +204,7 @@ def plot(self, series, sequences, sequences_idx, ylabels=None, filename=None, fi if figsize is None: figsize = (12, 8) sc = SeriesContainer(series) - fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex=True, sharey="col", figsize=figsize) + fig, axs = plt.subplots(nrows=len(sc), ncols=1, sharex='all', sharey='col', figsize=figsize) if len(sc) == 1: axs = [axs] for r in range(series.shape[0]): diff --git a/dtaidistance/util.py b/dtaidistance/util.py index 58b5c553..9824fd9d 100644 --- a/dtaidistance/util.py +++ b/dtaidistance/util.py @@ -365,3 +365,33 @@ def argmax(a): imax, vmax = i, v return imax + +class DetectKnee: + def __init__(self, alpha=0.3): + """EWMA based knee detection. + + https://cseweb.ucsd.edu//~snoeren/papers/plush-usenix06.pdf + """ + self.cnt = 0 + self.arrvar_fraction = 4 + self.alpha = alpha + self.arr = None + self.arrvar = None + self.max_thr = None + + def dostop(self, value): + if self.arr is None: + self.arr = value + self.arrvar = 0 + return False + + rvalue = False + self.max_thr = self.arr + self.arrvar_fraction * self.arrvar + # We need to see at least three instances to compute a reasonable arrvar + if self.cnt > 2 and value > self.max_thr: + rvalue = True + + self.arrvar = self.alpha * max(0, value - self.arr) + (1.0 - self.alpha) * self.arrvar + self.arr = self.alpha * value + (1.0 - self.alpha) * self.arr + self.cnt += 1 + return rvalue diff --git a/tests/test_subsequence.py b/tests/test_subsequence.py index 8c88ab89..c24319d9 100644 --- a/tests/test_subsequence.py +++ b/tests/test_subsequence.py @@ -48,7 +48,7 @@ def test_dtw_subseq1(): def test_dtw_subseq1_maxrangefactor(): with util_numpy.test_uses_numpy() as np: query = np.array([1., 2, 0]) - series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0, 5, 6, 0]) + series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) sa = subsequence_alignment(query, series) best_k = list(sa.best_matches(max_rangefactor=1.2)) assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] @@ -56,6 +56,17 @@ def test_dtw_subseq1_maxrangefactor(): assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] +def test_dtw_subseq1_knee(): + with util_numpy.test_uses_numpy() as np: + query = np.array([1., 2, 0]) + series = np.array([1., 0, 1, 2, 1, 0, 2, 0, 3, 0, 0, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + sa = subsequence_alignment(query, series) + best_k = list(sa.best_matches_knee(alpha=0.3)) + assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] + best_k = list(sa.best_matches_knee_fast(alpha=0.3)) + assert [m.segment for m in best_k] == [[2, 4], [5, 7], [0, 1]] + + @numpyonly def test_dtw_subseq_eeg(): with util_numpy.test_uses_numpy() as np: From 44d665485070047e5e976b08952f21f52b18d87f Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 11 Apr 2024 21:50:19 +0200 Subject: [PATCH 24/33] docs --- dtaidistance/util.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/dtaidistance/util.py b/dtaidistance/util.py index 9824fd9d..adde8ef5 100644 --- a/dtaidistance/util.py +++ b/dtaidistance/util.py @@ -368,13 +368,20 @@ def argmax(a): class DetectKnee: def __init__(self, alpha=0.3): - """EWMA based knee detection. + """Exponential Weighted Moving Average (EWMA) based knee detection. + Useful to detect when values start increasing at an increased rate. + + Based on: https://cseweb.ucsd.edu//~snoeren/papers/plush-usenix06.pdf + + :param alpha: EWMA parameter, in [0,1] + Low values prefer old values, high values prefer recent values. """ - self.cnt = 0 + self.cnt = 0 # Number of data points seen + self.min_points = 3 # Minimal number of data points to see before stopping self.arrvar_fraction = 4 - self.alpha = alpha + self.alpha = alpha # EWMA parameter self.arr = None self.arrvar = None self.max_thr = None @@ -387,8 +394,8 @@ def dostop(self, value): rvalue = False self.max_thr = self.arr + self.arrvar_fraction * self.arrvar - # We need to see at least three instances to compute a reasonable arrvar - if self.cnt > 2 and value > self.max_thr: + # We need to see at least min_points instances to compute a reasonable arrvar + if self.cnt >= self.min_points and value > self.max_thr: rvalue = True self.arrvar = self.alpha * max(0, value - self.arr) + (1.0 - self.alpha) * self.arrvar From 940063dd4a0e3e916497b6eb8b02feb03c5da8c3 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 29 Apr 2024 11:59:36 +0200 Subject: [PATCH 25/33] Remove xcode files --- .../DTAIDistanceC.xcodeproj/project.pbxproj | 814 ------------------ .../contents.xcworkspacedata | 7 - .../xcshareddata/IDEWorkspaceChecks.plist | 8 - .../xcschemes/DTAIDistanceC.xcscheme | 78 -- .../xcschemes/DTAIDistanceCBenchmark.xcscheme | 90 -- .../xcschemes/DTAIDistanceCTestDTW.xcscheme | 88 -- .../DTAIDistanceCTestMatrix.xcscheme | 84 -- .../xcdebugger/Breakpoints_v2.xcbkptlist | 24 - .../xcschemes/xcschememanagement.plist | 67 -- 9 files changed, 1260 deletions(-) delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/contents.xcworkspacedata delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceC.xcscheme delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCBenchmark.xcscheme delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestDTW.xcscheme delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestMatrix.xcscheme delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist delete mode 100644 dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcschemes/xcschememanagement.plist diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj deleted file mode 100644 index 208091bf..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.pbxproj +++ /dev/null @@ -1,814 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 50; - objects = { - -/* Begin PBXBuildFile section */ - C043A85424BC69AC00BFCF3E /* dd_ed.c in Sources */ = {isa = PBXBuildFile; fileRef = C043A85324BC69AC00BFCF3E /* dd_ed.c */; }; - C043A85524BC69AC00BFCF3E /* dd_ed.c in Sources */ = {isa = PBXBuildFile; fileRef = C043A85324BC69AC00BFCF3E /* dd_ed.c */; }; - C043A85624BC69AC00BFCF3E /* dd_ed.c in Sources */ = {isa = PBXBuildFile; fileRef = C043A85324BC69AC00BFCF3E /* dd_ed.c */; }; - C043A85724BC69AC00BFCF3E /* dd_ed.c in Sources */ = {isa = PBXBuildFile; fileRef = C043A85324BC69AC00BFCF3E /* dd_ed.c */; }; - C043A85824BC69AC00BFCF3E /* dd_ed.c in Sources */ = {isa = PBXBuildFile; fileRef = C043A85324BC69AC00BFCF3E /* dd_ed.c */; }; - C06FD1D724A652B400892537 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1D624A652B400892537 /* main.c */; }; - C06FD1DF24A652F100892537 /* dd_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1DE24A652F100892537 /* dd_dtw.c */; }; - C06FD1ED24A8E1FD00892537 /* dd_tests_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1E024A8DE1800892537 /* dd_tests_dtw.c */; }; - C06FD1EE24A8E20300892537 /* dd_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1DE24A652F100892537 /* dd_dtw.c */; }; - C06FD1FD24A94A6E00892537 /* dd_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1DE24A652F100892537 /* dd_dtw.c */; }; - C06FD1FE24A94A7100892537 /* dd_benchmark.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1F124A9487C00892537 /* dd_benchmark.c */; }; - C06FD20124A9DB1F00892537 /* dd_dtw_openmp.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */; }; - C06FD20224A9DB1F00892537 /* dd_dtw_openmp.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */; }; - C06FD20324A9DB1F00892537 /* dd_dtw_openmp.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */; }; - C06FD20824AA10F600892537 /* dd_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1DE24A652F100892537 /* dd_dtw.c */; }; - C06FD20924AA10F600892537 /* dd_dtw_openmp.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */; }; - C06FD20A24AA10F600892537 /* dd_tests_matrix.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20424AA0BBC00892537 /* dd_tests_matrix.c */; }; - C0CAB7A824F3C5A000762BE3 /* dd_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = C0CAB7A724F3C5A000762BE3 /* dd_globals.c */; }; - C0CAB7A924F3C5A000762BE3 /* dd_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = C0CAB7A724F3C5A000762BE3 /* dd_globals.c */; }; - C0CAB7AA24F3C5A000762BE3 /* dd_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = C0CAB7A724F3C5A000762BE3 /* dd_globals.c */; }; - C0CAB7AB24F3C5A000762BE3 /* dd_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = C0CAB7A724F3C5A000762BE3 /* dd_globals.c */; }; - C0CAB7AC24F3C5A000762BE3 /* dd_globals.c in Sources */ = {isa = PBXBuildFile; fileRef = C0CAB7A724F3C5A000762BE3 /* dd_globals.c */; }; - C0CD703F24AF4F1F00F1BB3B /* dd_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1DE24A652F100892537 /* dd_dtw.c */; }; - C0CD704024AF4F1F00F1BB3B /* dd_dtw_openmp.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */; }; - C0CD704124AF4F1F00F1BB3B /* dd_tests_matrix.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD20424AA0BBC00892537 /* dd_tests_matrix.c */; }; - C0CD704824AF4F3500F1BB3B /* dd_tests_dtw.c in Sources */ = {isa = PBXBuildFile; fileRef = C06FD1E024A8DE1800892537 /* dd_tests_dtw.c */; }; -/* End PBXBuildFile section */ - -/* Begin PBXCopyFilesBuildPhase section */ - C06FD1D124A652B400892537 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; - C06FD1E424A8DF9900892537 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; - C06FD1F424A94A2600892537 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; - C06FD20D24AA10F600892537 /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; - C0CD704324AF4F1F00F1BB3B /* CopyFiles */ = { - isa = PBXCopyFilesBuildPhase; - buildActionMask = 2147483647; - dstPath = /usr/share/man/man1/; - dstSubfolderSpec = 0; - files = ( - ); - runOnlyForDeploymentPostprocessing = 1; - }; -/* End PBXCopyFilesBuildPhase section */ - -/* Begin PBXFileReference section */ - C01B248429A3B0C00050C980 /* DTAIDistanceCBenchmark.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DTAIDistanceCBenchmark.entitlements; sourceTree = ""; }; - C043A85224BC69AC00BFCF3E /* dd_ed.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dd_ed.h; sourceTree = ""; }; - C043A85324BC69AC00BFCF3E /* dd_ed.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_ed.c; sourceTree = ""; }; - C043A85924BC6A2D00BFCF3E /* dd_globals.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dd_globals.h; sourceTree = ""; }; - C06FD1D324A652B400892537 /* DTAIDistanceC */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = DTAIDistanceC; sourceTree = BUILT_PRODUCTS_DIR; }; - C06FD1D624A652B400892537 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = ""; }; - C06FD1DD24A652F100892537 /* dd_dtw.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dd_dtw.h; sourceTree = ""; }; - C06FD1DE24A652F100892537 /* dd_dtw.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_dtw.c; sourceTree = ""; }; - C06FD1E024A8DE1800892537 /* dd_tests_dtw.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_tests_dtw.c; sourceTree = ""; }; - C06FD1E624A8DF9900892537 /* DTAIDistanceCTestDTW */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = DTAIDistanceCTestDTW; sourceTree = BUILT_PRODUCTS_DIR; }; - C06FD1F124A9487C00892537 /* dd_benchmark.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_benchmark.c; sourceTree = ""; }; - C06FD1F624A94A2600892537 /* DTAIDistanceCBenchmark */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = DTAIDistanceCBenchmark; sourceTree = BUILT_PRODUCTS_DIR; }; - C06FD1FF24A9DB1F00892537 /* dd_dtw_openmp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dd_dtw_openmp.h; sourceTree = ""; }; - C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_dtw_openmp.c; sourceTree = ""; }; - C06FD20424AA0BBC00892537 /* dd_tests_matrix.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_tests_matrix.c; sourceTree = ""; }; - C06FD21124AA10F600892537 /* DTAIDistanceCTestMatrix */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = DTAIDistanceCTestMatrix; sourceTree = BUILT_PRODUCTS_DIR; }; - C0CAB7A724F3C5A000762BE3 /* dd_globals.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = dd_globals.c; sourceTree = ""; }; - C0CD704724AF4F1F00F1BB3B /* DTAIDistanceCTestAll */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = DTAIDistanceCTestAll; sourceTree = BUILT_PRODUCTS_DIR; }; - C0F8ACE728B03310009FAE9C /* jinja */ = {isa = PBXFileReference; lastKnownFileType = folder; path = jinja; sourceTree = ""; }; -/* End PBXFileReference section */ - -/* Begin PBXFrameworksBuildPhase section */ - C06FD1D024A652B400892537 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C06FD1E324A8DF9900892537 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C06FD1F324A94A2600892537 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C06FD20C24AA10F600892537 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C0CD704224AF4F1F00F1BB3B /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXFrameworksBuildPhase section */ - -/* Begin PBXGroup section */ - C06FD1CA24A652B400892537 = { - isa = PBXGroup; - children = ( - C01B248429A3B0C00050C980 /* DTAIDistanceCBenchmark.entitlements */, - C06FD1D524A652B400892537 /* DTAIDistanceC */, - C06FD1D424A652B400892537 /* Products */, - ); - sourceTree = ""; - }; - C06FD1D424A652B400892537 /* Products */ = { - isa = PBXGroup; - children = ( - C06FD1D324A652B400892537 /* DTAIDistanceC */, - C06FD1E624A8DF9900892537 /* DTAIDistanceCTestDTW */, - C06FD1F624A94A2600892537 /* DTAIDistanceCBenchmark */, - C06FD21124AA10F600892537 /* DTAIDistanceCTestMatrix */, - C0CD704724AF4F1F00F1BB3B /* DTAIDistanceCTestAll */, - ); - name = Products; - sourceTree = ""; - }; - C06FD1D524A652B400892537 /* DTAIDistanceC */ = { - isa = PBXGroup; - children = ( - C06FD1D624A652B400892537 /* main.c */, - C06FD1DD24A652F100892537 /* dd_dtw.h */, - C06FD1DE24A652F100892537 /* dd_dtw.c */, - C06FD1FF24A9DB1F00892537 /* dd_dtw_openmp.h */, - C06FD20024A9DB1F00892537 /* dd_dtw_openmp.c */, - C06FD1F124A9487C00892537 /* dd_benchmark.c */, - C06FD1E024A8DE1800892537 /* dd_tests_dtw.c */, - C06FD20424AA0BBC00892537 /* dd_tests_matrix.c */, - C043A85224BC69AC00BFCF3E /* dd_ed.h */, - C043A85324BC69AC00BFCF3E /* dd_ed.c */, - C043A85924BC6A2D00BFCF3E /* dd_globals.h */, - C0CAB7A724F3C5A000762BE3 /* dd_globals.c */, - C0F8ACE728B03310009FAE9C /* jinja */, - ); - path = DTAIDistanceC; - sourceTree = ""; - }; -/* End PBXGroup section */ - -/* Begin PBXNativeTarget section */ - C06FD1D224A652B400892537 /* DTAIDistanceC */ = { - isa = PBXNativeTarget; - buildConfigurationList = C06FD1DA24A652B400892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceC" */; - buildPhases = ( - C06FD1CF24A652B400892537 /* Sources */, - C06FD1D024A652B400892537 /* Frameworks */, - C06FD1D124A652B400892537 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = DTAIDistanceC; - productName = DTAIDistance; - productReference = C06FD1D324A652B400892537 /* DTAIDistanceC */; - productType = "com.apple.product-type.tool"; - }; - C06FD1E524A8DF9900892537 /* DTAIDistanceCTestDTW */ = { - isa = PBXNativeTarget; - buildConfigurationList = C06FD1EA24A8DF9900892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceCTestDTW" */; - buildPhases = ( - C06FD1E224A8DF9900892537 /* Sources */, - C06FD1E324A8DF9900892537 /* Frameworks */, - C06FD1E424A8DF9900892537 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = DTAIDistanceCTestDTW; - productName = DTAIDistanceTest; - productReference = C06FD1E624A8DF9900892537 /* DTAIDistanceCTestDTW */; - productType = "com.apple.product-type.tool"; - }; - C06FD1F524A94A2600892537 /* DTAIDistanceCBenchmark */ = { - isa = PBXNativeTarget; - buildConfigurationList = C06FD1FA24A94A2600892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceCBenchmark" */; - buildPhases = ( - C06FD1F224A94A2600892537 /* Sources */, - C06FD1F324A94A2600892537 /* Frameworks */, - C06FD1F424A94A2600892537 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = DTAIDistanceCBenchmark; - productName = DTAIDistanceCBenchmark; - productReference = C06FD1F624A94A2600892537 /* DTAIDistanceCBenchmark */; - productType = "com.apple.product-type.tool"; - }; - C06FD20624AA10F600892537 /* DTAIDistanceCTestMatrix */ = { - isa = PBXNativeTarget; - buildConfigurationList = C06FD20E24AA10F600892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceCTestMatrix" */; - buildPhases = ( - C06FD20724AA10F600892537 /* Sources */, - C06FD20C24AA10F600892537 /* Frameworks */, - C06FD20D24AA10F600892537 /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = DTAIDistanceCTestMatrix; - productName = DTAIDistanceTest; - productReference = C06FD21124AA10F600892537 /* DTAIDistanceCTestMatrix */; - productType = "com.apple.product-type.tool"; - }; - C0CD703D24AF4F1F00F1BB3B /* DTAIDistanceCTestAll */ = { - isa = PBXNativeTarget; - buildConfigurationList = C0CD704424AF4F1F00F1BB3B /* Build configuration list for PBXNativeTarget "DTAIDistanceCTestAll" */; - buildPhases = ( - C0CD703E24AF4F1F00F1BB3B /* Sources */, - C0CD704224AF4F1F00F1BB3B /* Frameworks */, - C0CD704324AF4F1F00F1BB3B /* CopyFiles */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = DTAIDistanceCTestAll; - productName = DTAIDistanceTest; - productReference = C0CD704724AF4F1F00F1BB3B /* DTAIDistanceCTestAll */; - productType = "com.apple.product-type.tool"; - }; -/* End PBXNativeTarget section */ - -/* Begin PBXProject section */ - C06FD1CB24A652B400892537 /* Project object */ = { - isa = PBXProject; - attributes = { - LastUpgradeCheck = 1150; - ORGANIZATIONNAME = "Wannes Meert"; - TargetAttributes = { - C06FD1D224A652B400892537 = { - CreatedOnToolsVersion = 11.5; - }; - C06FD1E524A8DF9900892537 = { - CreatedOnToolsVersion = 11.5; - }; - C06FD1F524A94A2600892537 = { - CreatedOnToolsVersion = 11.5; - }; - }; - }; - buildConfigurationList = C06FD1CE24A652B400892537 /* Build configuration list for PBXProject "DTAIDistanceC" */; - compatibilityVersion = "Xcode 9.3"; - developmentRegion = en; - hasScannedForEncodings = 0; - knownRegions = ( - en, - Base, - ); - mainGroup = C06FD1CA24A652B400892537; - productRefGroup = C06FD1D424A652B400892537 /* Products */; - projectDirPath = ""; - projectRoot = ""; - targets = ( - C06FD1D224A652B400892537 /* DTAIDistanceC */, - C06FD1F524A94A2600892537 /* DTAIDistanceCBenchmark */, - C06FD1E524A8DF9900892537 /* DTAIDistanceCTestDTW */, - C06FD20624AA10F600892537 /* DTAIDistanceCTestMatrix */, - C0CD703D24AF4F1F00F1BB3B /* DTAIDistanceCTestAll */, - ); - }; -/* End PBXProject section */ - -/* Begin PBXSourcesBuildPhase section */ - C06FD1CF24A652B400892537 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - C06FD1D724A652B400892537 /* main.c in Sources */, - C06FD20124A9DB1F00892537 /* dd_dtw_openmp.c in Sources */, - C0CAB7A824F3C5A000762BE3 /* dd_globals.c in Sources */, - C043A85424BC69AC00BFCF3E /* dd_ed.c in Sources */, - C06FD1DF24A652F100892537 /* dd_dtw.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C06FD1E224A8DF9900892537 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - C06FD1EE24A8E20300892537 /* dd_dtw.c in Sources */, - C06FD20224A9DB1F00892537 /* dd_dtw_openmp.c in Sources */, - C0CAB7AA24F3C5A000762BE3 /* dd_globals.c in Sources */, - C043A85624BC69AC00BFCF3E /* dd_ed.c in Sources */, - C06FD1ED24A8E1FD00892537 /* dd_tests_dtw.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C06FD1F224A94A2600892537 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - C06FD1FE24A94A7100892537 /* dd_benchmark.c in Sources */, - C06FD20324A9DB1F00892537 /* dd_dtw_openmp.c in Sources */, - C0CAB7A924F3C5A000762BE3 /* dd_globals.c in Sources */, - C043A85524BC69AC00BFCF3E /* dd_ed.c in Sources */, - C06FD1FD24A94A6E00892537 /* dd_dtw.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C06FD20724AA10F600892537 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - C06FD20824AA10F600892537 /* dd_dtw.c in Sources */, - C06FD20924AA10F600892537 /* dd_dtw_openmp.c in Sources */, - C0CAB7AB24F3C5A000762BE3 /* dd_globals.c in Sources */, - C043A85724BC69AC00BFCF3E /* dd_ed.c in Sources */, - C06FD20A24AA10F600892537 /* dd_tests_matrix.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - C0CD703E24AF4F1F00F1BB3B /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - C0CD703F24AF4F1F00F1BB3B /* dd_dtw.c in Sources */, - C0CD704024AF4F1F00F1BB3B /* dd_dtw_openmp.c in Sources */, - C0CAB7AC24F3C5A000762BE3 /* dd_globals.c in Sources */, - C0CD704824AF4F3500F1BB3B /* dd_tests_dtw.c in Sources */, - C043A85824BC69AC00BFCF3E /* dd_ed.c in Sources */, - C0CD704124AF4F1F00F1BB3B /* dd_tests_matrix.c in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXSourcesBuildPhase section */ - -/* Begin XCBuildConfiguration section */ - C06FD1D824A652B400892537 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_ANALYZER_NONNULL = YES; - CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; - CLANG_CXX_LIBRARY = "libc++"; - CLANG_ENABLE_MODULES = YES; - CLANG_ENABLE_OBJC_ARC = YES; - CLANG_ENABLE_OBJC_WEAK = YES; - CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; - CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_COMMA = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; - CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; - CLANG_WARN_DOCUMENTATION_COMMENTS = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_ENUM_CONVERSION = YES; - CLANG_WARN_INFINITE_RECURSION = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; - CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; - CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; - CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; - CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; - CLANG_WARN_STRICT_PROTOTYPES = YES; - CLANG_WARN_SUSPICIOUS_MOVE = YES; - CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; - CLANG_WARN_UNREACHABLE_CODE = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - DEBUG_INFORMATION_FORMAT = dwarf; - ENABLE_STRICT_OBJC_MSGSEND = YES; - ENABLE_TESTABILITY = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; - GCC_DYNAMIC_NO_PIC = NO; - GCC_NO_COMMON_BLOCKS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; - GCC_WARN_UNDECLARED_SELECTOR = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; - GCC_WARN_UNUSED_FUNCTION = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 12.0; - MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; - MTL_FAST_MATH = YES; - ONLY_ACTIVE_ARCH = YES; - SDKROOT = macosx; - }; - name = Debug; - }; - C06FD1D924A652B400892537 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_ANALYZER_NONNULL = YES; - CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; - CLANG_CXX_LIBRARY = "libc++"; - CLANG_ENABLE_MODULES = YES; - CLANG_ENABLE_OBJC_ARC = YES; - CLANG_ENABLE_OBJC_WEAK = YES; - CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; - CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_COMMA = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; - CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; - CLANG_WARN_DOCUMENTATION_COMMENTS = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_ENUM_CONVERSION = YES; - CLANG_WARN_INFINITE_RECURSION = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; - CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; - CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; - CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; - CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; - CLANG_WARN_STRICT_PROTOTYPES = YES; - CLANG_WARN_SUSPICIOUS_MOVE = YES; - CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; - CLANG_WARN_UNREACHABLE_CODE = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; - ENABLE_NS_ASSERTIONS = NO; - ENABLE_STRICT_OBJC_MSGSEND = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; - GCC_NO_COMMON_BLOCKS = YES; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; - GCC_WARN_UNDECLARED_SELECTOR = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; - GCC_WARN_UNUSED_FUNCTION = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 12.0; - MTL_ENABLE_DEBUG_INFO = NO; - MTL_FAST_MATH = YES; - SDKROOT = macosx; - }; - name = Release; - }; - C06FD1DB24A652B400892537 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = "-fopenmp"; - OTHER_LDFLAGS = "-lomp"; - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Debug; - }; - C06FD1DC24A652B400892537 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = "-fopenmp"; - OTHER_LDFLAGS = "-lomp"; - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Release; - }; - C06FD1EB24A8DF9900892537 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_IDENTITY = ""; - CODE_SIGN_INJECT_BASE_ENTITLEMENTS = YES; - CODE_SIGN_STYLE = Manual; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - GCC_WARN_PEDANTIC = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = ( - "-lcriterion", - "-lomp", - ); - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Debug; - }; - C06FD1EC24A8DF9900892537 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_IDENTITY = ""; - CODE_SIGN_INJECT_BASE_ENTITLEMENTS = YES; - CODE_SIGN_STYLE = Manual; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - GCC_WARN_PEDANTIC = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = ( - "-lcriterion", - "-lomp", - ); - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Release; - }; - C06FD1FB24A94A2600892537 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = DTAIDistanceCBenchmark.entitlements; - CODE_SIGN_IDENTITY = ""; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = "-lomp"; - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Debug; - }; - C06FD1FC24A94A2600892537 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_ENTITLEMENTS = DTAIDistanceCBenchmark.entitlements; - CODE_SIGN_IDENTITY = ""; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "-"; - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = "-lomp"; - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Release; - }; - C06FD20F24AA10F600892537 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_IDENTITY = ""; - CODE_SIGN_INJECT_BASE_ENTITLEMENTS = YES; - CODE_SIGN_STYLE = Manual; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = ( - "-lcriterion", - "-lomp", - ); - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Debug; - }; - C06FD21024AA10F600892537 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_IDENTITY = ""; - CODE_SIGN_INJECT_BASE_ENTITLEMENTS = YES; - CODE_SIGN_STYLE = Manual; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = ( - "-lcriterion", - "-lomp", - ); - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Release; - }; - C0CD704524AF4F1F00F1BB3B /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_IDENTITY = ""; - CODE_SIGN_INJECT_BASE_ENTITLEMENTS = YES; - CODE_SIGN_STYLE = Manual; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = ( - "-lcriterion", - "-lomp", - ); - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Debug; - }; - C0CD704624AF4F1F00F1BB3B /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - CODE_SIGN_IDENTITY = ""; - CODE_SIGN_INJECT_BASE_ENTITLEMENTS = YES; - CODE_SIGN_STYLE = Manual; - DEVELOPMENT_TEAM = 2462T87J45; - ENABLE_HARDENED_RUNTIME = YES; - HEADER_SEARCH_PATHS = ( - /usr/local/include, - /opt/homebrew/include, - ); - LIBRARY_SEARCH_PATHS = ( - /usr/local/lib, - /opt/homebrew/lib/, - ); - OTHER_CFLAGS = ( - "-Xpreprocessor", - "-fopenmp", - ); - OTHER_LDFLAGS = ( - "-lcriterion", - "-lomp", - ); - PRODUCT_NAME = "$(TARGET_NAME)"; - }; - name = Release; - }; -/* End XCBuildConfiguration section */ - -/* Begin XCConfigurationList section */ - C06FD1CE24A652B400892537 /* Build configuration list for PBXProject "DTAIDistanceC" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C06FD1D824A652B400892537 /* Debug */, - C06FD1D924A652B400892537 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - C06FD1DA24A652B400892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceC" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C06FD1DB24A652B400892537 /* Debug */, - C06FD1DC24A652B400892537 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - C06FD1EA24A8DF9900892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceCTestDTW" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C06FD1EB24A8DF9900892537 /* Debug */, - C06FD1EC24A8DF9900892537 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - C06FD1FA24A94A2600892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceCBenchmark" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C06FD1FB24A94A2600892537 /* Debug */, - C06FD1FC24A94A2600892537 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - C06FD20E24AA10F600892537 /* Build configuration list for PBXNativeTarget "DTAIDistanceCTestMatrix" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C06FD20F24AA10F600892537 /* Debug */, - C06FD21024AA10F600892537 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; - C0CD704424AF4F1F00F1BB3B /* Build configuration list for PBXNativeTarget "DTAIDistanceCTestAll" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - C0CD704524AF4F1F00F1BB3B /* Debug */, - C0CD704624AF4F1F00F1BB3B /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; -/* End XCConfigurationList section */ - }; - rootObject = C06FD1CB24A652B400892537 /* Project object */; -} diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/contents.xcworkspacedata deleted file mode 100644 index c3c07a88..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/contents.xcworkspacedata +++ /dev/null @@ -1,7 +0,0 @@ - - - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist deleted file mode 100644 index 18d98100..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +++ /dev/null @@ -1,8 +0,0 @@ - - - - - IDEDidComputeMac32BitWarning - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceC.xcscheme b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceC.xcscheme deleted file mode 100644 index 51511f6f..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceC.xcscheme +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCBenchmark.xcscheme b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCBenchmark.xcscheme deleted file mode 100644 index 55500c30..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCBenchmark.xcscheme +++ /dev/null @@ -1,90 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestDTW.xcscheme b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestDTW.xcscheme deleted file mode 100644 index 3277503a..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestDTW.xcscheme +++ /dev/null @@ -1,88 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestMatrix.xcscheme b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestMatrix.xcscheme deleted file mode 100644 index bd31e4cc..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcshareddata/xcschemes/DTAIDistanceCTestMatrix.xcscheme +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist deleted file mode 100644 index c9010b3a..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - diff --git a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcschemes/xcschememanagement.plist b/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcschemes/xcschememanagement.plist deleted file mode 100644 index 89562952..00000000 --- a/dtaidistance/lib/DTAIDistanceC/DTAIDistanceC.xcodeproj/xcuserdata/wannes.xcuserdatad/xcschemes/xcschememanagement.plist +++ /dev/null @@ -1,67 +0,0 @@ - - - - - SchemeUserState - - DTAIDistanceC.xcscheme_^#shared#^_ - - orderHint - 0 - - DTAIDistanceCBenchmark.xcscheme_^#shared#^_ - - orderHint - 2 - - DTAIDistanceCTestAll.xcscheme_^#shared#^_ - - orderHint - 4 - - DTAIDistanceCTestDTW.xcscheme_^#shared#^_ - - orderHint - 1 - - DTAIDistanceCTestMatrix.xcscheme_^#shared#^_ - - orderHint - 3 - - arm.xcscheme_^#shared#^_ - - orderHint - 5 - - - SuppressBuildableAutocreation - - C06FD1D224A652B400892537 - - primary - - - C06FD1E524A8DF9900892537 - - primary - - - C06FD1F524A94A2600892537 - - primary - - - C06FD20624AA10F600892537 - - primary - - - C0CD703D24AF4F1F00F1BB3B - - primary - - - - - From 6cb11a7d33ff903a7fc830d1f1f786c45dcb0e10 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 29 Apr 2024 12:49:35 +0200 Subject: [PATCH 26/33] setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 29c1159d..46a4c724 100755 --- a/setup.py +++ b/setup.py @@ -499,7 +499,7 @@ def set_setup_kwargs(**kwargs): 'DTAIDistance source': 'https://github.com/wannesm/dtaidistance' }, packages=['dtaidistance', 'dtaidistance.clustering', 'dtaidistance.subsequence', - 'dtaidistance.connectors'], + 'dtaidistance.connectors', 'dtaidistance.symbolization'], python_requires='>=3.5', install_requires=install_requires, setup_requires=setup_requires, From 11c0456e90b27ff6d030e15794cfbf130f3b9567 Mon Sep 17 00:00:00 2001 From: wannesm Date: Sun, 12 May 2024 23:44:03 +0200 Subject: [PATCH 27/33] Add ddtw --- dtaidistance/preprocessing.py | 45 +++++++++++++++++++++++++++++++++++ tests/test_preprocessing.py | 34 +++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index 0f0f4ae0..dff406ea 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -88,6 +88,51 @@ def smoothing(series, smooth): return series +def derivative(series, smooth=None, diff_args=None): + """Derivative series. + + Based on Keogh, E. and Pazzani, M. "Derivative Dynamic Time Warping". + SIAM International Conference on Data Mining, 2002. + + :param series: Time series (must be numpy compatible) + :param smooth: Smooth the derivative series by removing the highest frequencies. + The cut-off frequency is computed using the `smooth` argument. This + fraction (a number between 0.0 and 0.5) of the highest frequencies is + removed. + :param diff_args: Arguments to pass the numpy.diff + :return: Differenced Numpy array of length len(series) - 1 + """ + try: + import numpy as np + except ImportError: + raise NumpyException("Differencing requires Numpy") + axis = 0 + if isinstance(series, np.ndarray): + if len(series.shape) == 1: + axis = 0 + else: + axis = 1 + + if axis == 0: + qim = series[:-2] + qi = series[1:-1] + qip = series[2:] + else: + raise NotImplementedError("Derivative for axis!=0 is not yet implemented") + + seriesd = np.zeros(series.shape[axis]) + seriesd[1:-1] = np.add(np.subtract(qi, qim), (np.subtract(qip, qim) / 2)) / 2 + if axis == 0: + seriesd[0] = series[1] - series[0] + seriesd[-1] = series[-1] - series[-2] + else: + raise NotImplementedError("Derivative for axis!=0 is not yet implemented") + + if smooth is not None: + seriesd = smoothing(seriesd, smooth) + return seriesd + + def logdomain(series): """Transform to the log domain and retain the sign of the signal. diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 337b595f..a9e6ce79 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -6,7 +6,7 @@ import pytest -from dtaidistance.preprocessing import differencing +from dtaidistance.preprocessing import differencing, derivative from dtaidistance import util_numpy logger = logging.getLogger("be.kuleuven.dtai.distance") @@ -31,6 +31,38 @@ def test_differencing(): -0.001698, -0.001238, -0.004681, -0.014869, -0.026607]])) +@numpyonly +@scipyonly +def test_derivative(): + with util_numpy.test_uses_numpy() as np: + def der2(qim, qip): + return (qip-qim) + + def der3(qim, qi, qip): + return ((qi-qim) + ((qip-qim)/2))/2 + + series = np.array([0.1, 0.15, 0.27, 0.3, 0.26, 0.24, 0.2, 0.1]) + seriesds = [der2(*series[0:2])] + \ + [der3(qim, qi, qip) for qim, qi, qip in zip(series[:-2], series[1:-1], series[2:])] + \ + [der2(*series[-2:])] + seriesd = derivative(series, smooth=None) + # import matplotlib.pyplot as plt + # fig, axs = plt.subplots(3, 1) + # axs[0].plot(series) + # for i, d in enumerate(seriesd): + # x = [i-1, i+1] + # y = [series[i]-d, series[i]+d] + # axs[0].plot(x, y, color="green", alpha=0.5) + # d = seriesds[i] + # y = [series[i] - d, series[i] + d] + # axs[0].plot(x, y, color="orange", alpha=0.5) + # axs[1].plot(seriesd) + # axs[2].plot(differencing(series)) + # plt.show() + np.testing.assert_array_almost_equal(seriesd[1:-1], np.array([0.0675, 0.0975, 0.0125, -0.035, -0.025, -0.055])) + np.testing.assert_array_almost_equal(seriesd, seriesds) + + if __name__ == "__main__": logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler(sys.stdout)) From 662f14f96db8fff29f71b43ee3f0014741915862 Mon Sep 17 00:00:00 2001 From: wannesm Date: Mon, 13 May 2024 11:00:34 +0200 Subject: [PATCH 28/33] docs --- dtaidistance/preprocessing.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dtaidistance/preprocessing.py b/dtaidistance/preprocessing.py index dff406ea..eca0218b 100644 --- a/dtaidistance/preprocessing.py +++ b/dtaidistance/preprocessing.py @@ -88,18 +88,21 @@ def smoothing(series, smooth): return series -def derivative(series, smooth=None, diff_args=None): +def derivative(series, smooth=None): """Derivative series. Based on Keogh, E. and Pazzani, M. "Derivative Dynamic Time Warping". SIAM International Conference on Data Mining, 2002. + The smoothing argument is used to smooth after computing the derivative. To apply the + smoothing as explained in Keogh et al. (2002) one should do exponential smoothing before + applying this method. + :param series: Time series (must be numpy compatible) :param smooth: Smooth the derivative series by removing the highest frequencies. The cut-off frequency is computed using the `smooth` argument. This fraction (a number between 0.0 and 0.5) of the highest frequencies is removed. - :param diff_args: Arguments to pass the numpy.diff :return: Differenced Numpy array of length len(series) - 1 """ try: From 5587db72d2aa8ebba197c0ff41f1e1c729efaf16 Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 6 Jun 2024 09:27:20 +0200 Subject: [PATCH 29/33] docs --- dtaidistance/dtw.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 67a40fbd..55e50cf9 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -108,10 +108,17 @@ def __init__(self, window=None, use_pruning=False, max_dist=None, max_step=None, """Settings for Dynamic Time Warping distance methods. :param window: Only allow for maximal shifts from the two diagonals smaller than this number. + The maximally allowed warping, thus difference between indices i in series 1 and j in series 2, + is thus |i-j| < 2*window + |len(s1) - len(s2)|. It includes the diagonal, meaning that Euclidean distance is obtained by setting ``window=1.`` + If the two series are of equal length, this means that the band you see appearing + on the cumulative cost matrix is of width 2*window-1. In other definitions of DTW + this is referred to as the window. :param max_dist: Stop if the returned values will be larger than this value - :param max_step: Do not allow steps larger than this value + :param max_step: Do not allow steps larger than this value. + If the difference between two values in the two series is larger than this, thus + if |s1[i]-s2[j]| > max_step, replace that value with infinity. :param max_length_diff: Return infinity if length of two series is larger :param penalty: Penalty to add if compression or expansion is applied :param psi: Psi relaxation parameter (ignore start and end of matching). From 7ff5ef96b7b23dc5f19550f8b6957bb6e85d3459 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 11 Jun 2024 00:03:41 +0200 Subject: [PATCH 30/33] Bugfix: block triu ignored in dtw_cc_omp in dtw_cc_omp.distance_matrix_ndim --- dtaidistance/clustering/kmeans.py | 2 -- dtaidistance/dtw.py | 2 +- dtaidistance/dtw_cc_omp.pyx | 5 ++++- tests/test_clustering.py | 11 +++++++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dtaidistance/clustering/kmeans.py b/dtaidistance/clustering/kmeans.py index 6af1c252..4f9a1a42 100644 --- a/dtaidistance/clustering/kmeans.py +++ b/dtaidistance/clustering/kmeans.py @@ -184,8 +184,6 @@ def kmeansplusplus_centers(self, series, use_c=False): raise NumpyException("Numpy is required for the KMeans.kmeansplusplus_centers method.") logger.debug('Start K-means++ initialization ... ') ndim = self.series.detected_ndim - print(self.dists_options) - print(dtw_ndim.distance(series[0], series[5], **self.dists_options)) if use_c: if ndim == 1: fn = distance_matrix_fast diff --git a/dtaidistance/dtw.py b/dtaidistance/dtw.py index 55e50cf9..2549146c 100644 --- a/dtaidistance/dtw.py +++ b/dtaidistance/dtw.py @@ -799,7 +799,7 @@ def distance_matrix(s, block=None, compact=False, parallel=False, f'use_c={s.use_c}, dtw_cc_omp={dtw_cc_omp}, use_mp={use_mp}') exp_length = _distance_matrix_length(block, len(s)) - assert len(dists) == exp_length, "len(dists)={} != {}".format(len(dists), exp_length) + assert len(dists) == exp_length, "len(dists)={} != {} (block = {})".format(len(dists), exp_length, block) if compact: return dists diff --git a/dtaidistance/dtw_cc_omp.pyx b/dtaidistance/dtw_cc_omp.pyx index 1436b740..cd2febc1 100644 --- a/dtaidistance/dtw_cc_omp.pyx +++ b/dtaidistance/dtw_cc_omp.pyx @@ -103,15 +103,18 @@ def distance_matrix_ndim(cur, int ndim, block=None, **kwargs): cdef Py_ssize_t block_re=0 cdef Py_ssize_t block_cb=0 cdef Py_ssize_t block_ce=0 + cdef bint block_triu=True cdef Py_ssize_t ri = 0 if block is not None and block != 0.0: block_rb = block[0][0] block_re = block[0][1] block_cb = block[1][0] block_ce = block[1][1] + if len(block) > 2: + block_triu = block[2] settings = DTWSettings(**kwargs) - cdef DTWBlock dtwblock = DTWBlock(rb=block_rb, re=block_re, cb=block_cb, ce=block_ce) + cdef DTWBlock dtwblock = DTWBlock(rb=block_rb, re=block_re, cb=block_cb, ce=block_ce, triu=block_triu) length = distance_matrix_length(dtwblock, len(cur)) # Correct block diff --git a/tests/test_clustering.py b/tests/test_clustering.py index 46200ee7..b6c80cde 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -87,6 +87,17 @@ def test_clustering_tree_ndim(): assert cluster_idx[0] == {0, 1, 2} +@numpyonly +def test_kmeans_ndim(): + with util_numpy.test_uses_numpy() as np: + np.random.seed(seed=3980) + arr = np.random.random((10, 10, 3)) + + model = clustering.kmeans.KMeans(k=2) + cl, p = model.fit(arr, use_c=True) + assert str(cl) == "{0: {1, 2, 4, 6}, 1: {0, 3, 5, 7, 8, 9}}" + + @numpyonly def test_clustering_tree_maxdist(): with util_numpy.test_uses_numpy() as np: From c47785366ff19db8f917ad8a64b4fd569898bbb8 Mon Sep 17 00:00:00 2001 From: wannesm Date: Tue, 11 Jun 2024 00:10:21 +0200 Subject: [PATCH 31/33] Add test --- tests/test_clustering.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_clustering.py b/tests/test_clustering.py index b6c80cde..4e6ee460 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -98,6 +98,17 @@ def test_kmeans_ndim(): assert str(cl) == "{0: {1, 2, 4, 6}, 1: {0, 3, 5, 7, 8, 9}}" +@numpyonly +def test_kmeans_ndim2(): + with util_numpy.test_uses_numpy() as np: + np.random.seed(seed=3980) + arr = np.random.random((10, 10, 3)) + + model = clustering.kmeans.KMeans(k=2) + cl, p = model.fit(arr, use_parallel=False, use_c=True) + assert str(cl) == "{0: {1, 2, 4, 6}, 1: {0, 3, 5, 7, 8, 9}}" + + @numpyonly def test_clustering_tree_maxdist(): with util_numpy.test_uses_numpy() as np: From f54ed4ee91ca555b4965fa32be7de455b99a5f3a Mon Sep 17 00:00:00 2001 From: Nick Seeuws Date: Thu, 13 Jun 2024 15:28:41 +0200 Subject: [PATCH 32/33] Changed path for the homebrew OpenMP library --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 46a4c724..9bcc0472 100755 --- a/setup.py +++ b/setup.py @@ -253,12 +253,14 @@ def print2(*args, **kwargs): l_args['unix'].append(f'-L{p}') l_args['llvm'].append(f'-L{p}') # HomeBrew - p = Path('/opt/homebrew/include') + #p = Path('/opt/homebrew/include') + p = Path('/opt/homebrew/opt/libomp/include') # Location changed if p.exists(): print(f'Adding path to compiler: {p}') c_args['unix'].append(f'-I{p}') c_args['llvm'].append(f'-I{p}') - p = Path('/opt/homebrew/lib') + #p = Path('/opt/homebrew/lib') + p = Path('/opt/homebrew/opt/libomp/lib') # Location changed if p.exists(): libomp = Path(p / 'libomp.a') if self.distribution.forcestatic and platform.system() == 'Darwin' and libomp.exists(): From ecd9e359622a9ee424b4ebeb0e8cab44071dbbf5 Mon Sep 17 00:00:00 2001 From: wannesm Date: Thu, 13 Jun 2024 16:43:25 +0200 Subject: [PATCH 33/33] Bump version --- dtaidistance/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dtaidistance/__init__.py b/dtaidistance/__init__.py index 4e1ed832..e517da9d 100644 --- a/dtaidistance/__init__.py +++ b/dtaidistance/__init__.py @@ -32,7 +32,7 @@ # "then run `cd {};python3 setup.py build_ext --inplace`.".format(dtaidistance_dir)) dtw_cc = None -__version__ = "2.3.11" +__version__ = "2.3.12" __author__ = "Wannes Meert" __copyright__ = "Copyright 2017-2022 KU Leuven, DTAI Research Group" __license__ = "Apache License, Version 2.0"