diff --git a/README.md b/README.md index 26a3e15..5f30963 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,10 @@

-> 📣 TSDB now supports a total of 1️⃣6️⃣5️⃣ time-series datasets ‼️ +> 📣 TSDB now supports a total of 1️⃣6️⃣8️⃣ time-series datasets ‼️ + + +TSDB is a part of [PyPOTS project](https://github.com/WenjieDu/PyPOTS) (a Python toolbox for data mining on Partially-Observed Time Series), and was separated from PyPOTS for decoupling datasets from learning algorithms. TSDB is created to help researchers and engineers get rid of data collecting and downloading, and focus back on data processing details. TSDB provides all-in-one-stop convenience for downloading and loading open-source time-series datasets (available datasets listed [below](https://github.com/WenjieDu/TSDB#-list-of-available-datasets)). @@ -80,14 +83,11 @@ That's all. Simple and efficient. Enjoy it! 😃 | [PhysioNet Challenge 2019](dataset_profiles/physionet_2019) | Classification, Imputation | | [Beijing Multi-Site Air-Quality](dataset_profiles/beijing_multisite_air_quality) | Forecasting, Imputation | | [Electricity Load Diagrams](dataset_profiles/electricity_load_diagrams) | Forecasting, Imputation | -| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 160 datasets) | Classification | +| [UCR & UEA Datasets](dataset_profiles/ucr_uea_datasets) (all 163 datasets) | Classification | | [Vessel AIS](dataset_profiles/vessel_ais) | Classification, Forecasting, Imputation | ## ❖ Citing TSDB/PyPOTS - -TSDB is a part of [PyPOTS project](https://github.com/WenjieDu/PyPOTS) (a Python toolbox for data mining on Partially-Observed Time Series), and was separated from PyPOTS for decoupling datasets from learning algorithms. - The paper introducing PyPOTS project is available on arXiv at [this URL](https://arxiv.org/abs/2305.18811), and we are pursuing to publish it in prestigious academic venues, e.g. JMLR (track for [Machine Learning Open Source Software](https://www.jmlr.org/mloss/)). If you use TSDB in your work, diff --git a/dataset_profiles/ucr_uea_datasets/README.md b/dataset_profiles/ucr_uea_datasets/README.md index 7d4280f..a8772ed 100644 --- a/dataset_profiles/ucr_uea_datasets/README.md +++ b/dataset_profiles/ucr_uea_datasets/README.md @@ -1,14 +1,22 @@ # UCR & UEA Datasets -## All 160 datasets +## All 163 datasets +128 UCR + 33 UEA + 2 old removed (NonInvasiveFatalECGThorax1 and 2) = 163 + ``` +"ACSF1", "Adiac", +"AllGestureWiimoteX", +"AllGestureWiimoteY", +"AllGestureWiimoteZ", "ArrowHead", "Beef", "BeetleFly", "BirdChicken", +"BME", "Car", "CBF", +"Chinatown", "ChlorineConcentration", "CinCECGTorso", "Coffee", @@ -16,15 +24,22 @@ "CricketX", "CricketY", "CricketZ", +"Crop", "DiatomSizeReduction", "DistalPhalanxOutlineCorrect", "DistalPhalanxOutlineAgeGroup", "DistalPhalanxTW", +"DodgerLoopDay", +"DodgerLoopGame", +"DodgerLoopWeekend", "Earthquakes", "ECG200", "ECG5000", "ECGFiveDays", "ElectricDevices", +"EOGHorizontalSignal", +"EOGVerticalSignal", +"EthanolLevel", "FaceAll", "FaceFour", "FacesUCR", @@ -32,12 +47,26 @@ "Fish", "FordA", "FordB", +"FreezerRegularTrain", +"FreezerSmallTrain", +"Fungi", +"GestureMidAirD1", +"GestureMidAirD2", +"GestureMidAirD3", +"GesturePebbleZ1", +"GesturePebbleZ2", "GunPoint", +"GunPointAgeSpan", +"GunPointMaleVersusFemale", +"GunPointOldVersusYoung", "Ham", "HandOutlines", "Haptics", "Herring", +"HouseTwenty", "InlineSkate", +"InsectEPGRegularTrain", +"InsectEPGSmallTrain", "InsectWingbeatSound", "ItalyPowerDemand", "LargeKitchenAppliances", @@ -46,25 +75,40 @@ "Mallat", "Meat", "MedicalImages", +"MelbournePedestrian", "MiddlePhalanxOutlineCorrect", "MiddlePhalanxOutlineAgeGroup", "MiddlePhalanxTW", +"MixedShapesRegularTrain", +"MixedShapesSmallTrain", "MoteStrain", -"NonInvasiveFatalECGThorax1", -"NonInvasiveFatalECGThorax2", +"NonInvasiveFetalECGThorax1", +"NonInvasiveFetalECGThorax2", "OliveOil", "OSULeaf", "PhalangesOutlinesCorrect", "Phoneme", +"PickupGestureWiimoteZ", +"PigAirwayPressure", +"PigArtPressure", +"PigCVP", +"PLAID", "Plane", +"PowerCons", "ProximalPhalanxOutlineCorrect", "ProximalPhalanxOutlineAgeGroup", "ProximalPhalanxTW", "RefrigerationDevices", +"Rock", "ScreenType", +"SemgHandGenderCh2", +"SemgHandMovementCh2", +"SemgHandSubjectCh2", +"ShakeGestureWiimoteZ", "ShapeletSim", "ShapesAll", "SmallKitchenAppliances", +"SmoothSubspace", "SonyAIBORobotSurface1", "SonyAIBORobotSurface2", "StarLightCurves", @@ -77,10 +121,11 @@ "Trace", "TwoLeadECG", "TwoPatterns", +"UMD", +"UWaveGestureLibraryAll", "UWaveGestureLibraryX", "UWaveGestureLibraryY", "UWaveGestureLibraryZ", -"UWaveGestureLibraryAll", "Wafer", "Wine", "WordSynonyms", @@ -88,6 +133,9 @@ "WormsTwoClass", "Yoga", "ArticularyWordRecognition", +"AsphaltObstaclesCoordinates", +"AsphaltPavementTypeCoordinates", +"AsphaltRegularityCoordinates", "AtrialFibrillation", "BasicMotions", "CharacterTrajectories", @@ -110,58 +158,15 @@ "NATOPS", "PenDigits", "PEMS-SF", -"Phoneme", +"PhonemeSpectra", "RacketSports", "SelfRegulationSCP1", "SelfRegulationSCP2", "SpokenArabicDigits", "StandWalkJump", "UWaveGestureLibrary", -"ACSF1", -"AllGestureWiimoteX", -"AllGestureWiimoteY", -"AllGestureWiimoteZ", -"BME", -"Chinatown", -"Crop", -"DodgerLoopDay", -"DodgerLoopGame", -"DodgerLoopWeekend", -"EOGHorizontalSignal", -"EOGVerticalSignal", -"EthanolLevel", -"FreezerRegularTrain", -"FreezerSmallTrain", -"Fungi", -"GestureMidAirD1", -"GestureMidAirD2", -"GestureMidAirD3", -"GesturePebbleZ1", -"GesturePebbleZ2", -"GunPointAgeSpan", -"GunPointMaleVersusFemale", -"GunPointOldVersusYoung", -"HouseTwenty", -"InsectEPGRegularTrain", -"InsectEPGSmallTrain", -"MelbournePedestrian", -"MixedShapesRegularTrain", -"MixedShapesSmallTrain", -"NonInvasiveFetalECGThorax1", -"NonInvasiveFetalECGThorax2", -"PLAID", -"PickupGestureWiimoteZ", -"PigAirwayPressure", -"PigArtPressure", -"PigCVP", -"PowerCons", -"Rock", -"SemgHandGenderCh2", -"SemgHandMovementCh2", -"SemgHandSubjectCh2", -"ShakeGestureWiimoteZ", -"SmoothSubspace", -"UMD", +"NonInvasiveFatalECGThorax1", +"NonInvasiveFatalECGThorax2", ``` ## Citing this dataset 🤗 diff --git a/docs/index.rst b/docs/index.rst index 5cf8e8f..28f66e0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -41,7 +41,7 @@ Welcome to TSDB documentation! :alt: Visit num -📣 TSDB now supports a total of 1️⃣6️⃣5️⃣ time-series datasets ‼️ +📣 TSDB now supports a total of 1️⃣6️⃣8️⃣ time-series datasets ‼️ TSDB is created to help researchers and engineers get rid of data collecting and downloading, and focus back on data processing details. TSDB provides all-in-one-stop convenience for downloading and loading open-source time-series datasets (available datasets listed `below `_). @@ -96,7 +96,7 @@ That's all. Simple and efficient. Enjoy it! 😃 `PhysioNet Challenge 2019 `_ Classification, Imputation `Beijing Multi-Site Air-Quality `_ Forecasting, Imputation `Electricity Load Diagrams `_ Forecasting, Imputation - `UCR & UEA Datasets `_ (all 160 datasets) Classification + `UCR & UEA Datasets `_ (all 163 datasets) Classification `Vessel AIS data `_ Imputation, Forecasting, Classification =============================================================================================================================== ========================================== diff --git a/tsdb/database.py b/tsdb/database.py index 1b587ef..28e1528 100644 --- a/tsdb/database.py +++ b/tsdb/database.py @@ -10,6 +10,7 @@ CACHED_DATASET_DIR = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets") _DATABASE = { + # http://www.physionet.org/challenge/2012 # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012 "physionet_2012": [ "https://www.physionet.org/files/challenge-2012/1.0.0/set-a.tar.gz", @@ -19,29 +20,40 @@ "https://www.physionet.org/files/challenge-2012/1.0.0/Outcomes-b.txt", "https://www.physionet.org/files/challenge-2012/1.0.0/Outcomes-c.txt", ], + # http://www.physionet.org/challenge/2019 # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2019 "physionet_2019": [ "https://archive.physionet.org/users/shared/challenge-2019/training_setA.zip", "https://archive.physionet.org/users/shared/challenge-2019/training_setB.zip", ], + # # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/electricity_load_diagrams "electricity_load_diagrams": "https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip", + # # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/beijing_multisite_air_quality "beijing_multisite_air_quality": "https://archive.ics.uci.edu/ml/machine-learning-databases/00501/" - "PRSA2017_Data_20130301-20170228.zip", + "PRSA2017_Data_20130301-20170228.zip", + # # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/vessel_ais "vessel_ais": "https://zenodo.org/record/8064564/files/parquets.zip", } # https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/ucr_uea_datasets +# 128 UCR + 33 UEA + 2 old removed (NonInvasiveFatalECGThorax1 and 2) = 163 _ucr_uea_datasets = [ + "ACSF1", "Adiac", + "AllGestureWiimoteX", + "AllGestureWiimoteY", + "AllGestureWiimoteZ", "ArrowHead", "Beef", "BeetleFly", "BirdChicken", + "BME", "Car", "CBF", + "Chinatown", "ChlorineConcentration", "CinCECGTorso", "Coffee", @@ -49,15 +61,22 @@ "CricketX", "CricketY", "CricketZ", + "Crop", "DiatomSizeReduction", "DistalPhalanxOutlineCorrect", "DistalPhalanxOutlineAgeGroup", "DistalPhalanxTW", + "DodgerLoopDay", + "DodgerLoopGame", + "DodgerLoopWeekend", "Earthquakes", "ECG200", "ECG5000", "ECGFiveDays", "ElectricDevices", + "EOGHorizontalSignal", + "EOGVerticalSignal", + "EthanolLevel", "FaceAll", "FaceFour", "FacesUCR", @@ -65,12 +84,26 @@ "Fish", "FordA", "FordB", + "FreezerRegularTrain", + "FreezerSmallTrain", + "Fungi", + "GestureMidAirD1", + "GestureMidAirD2", + "GestureMidAirD3", + "GesturePebbleZ1", + "GesturePebbleZ2", "GunPoint", + "GunPointAgeSpan", + "GunPointMaleVersusFemale", + "GunPointOldVersusYoung", "Ham", "HandOutlines", "Haptics", "Herring", + "HouseTwenty", "InlineSkate", + "InsectEPGRegularTrain", + "InsectEPGSmallTrain", "InsectWingbeatSound", "ItalyPowerDemand", "LargeKitchenAppliances", @@ -79,25 +112,40 @@ "Mallat", "Meat", "MedicalImages", + "MelbournePedestrian", "MiddlePhalanxOutlineCorrect", "MiddlePhalanxOutlineAgeGroup", "MiddlePhalanxTW", + "MixedShapesRegularTrain", + "MixedShapesSmallTrain", "MoteStrain", - "NonInvasiveFatalECGThorax1", - "NonInvasiveFatalECGThorax2", + "NonInvasiveFetalECGThorax1", + "NonInvasiveFetalECGThorax2", "OliveOil", "OSULeaf", "PhalangesOutlinesCorrect", "Phoneme", + "PickupGestureWiimoteZ", + "PigAirwayPressure", + "PigArtPressure", + "PigCVP", + "PLAID", "Plane", + "PowerCons", "ProximalPhalanxOutlineCorrect", "ProximalPhalanxOutlineAgeGroup", "ProximalPhalanxTW", "RefrigerationDevices", + "Rock", "ScreenType", + "SemgHandGenderCh2", + "SemgHandMovementCh2", + "SemgHandSubjectCh2", + "ShakeGestureWiimoteZ", "ShapeletSim", "ShapesAll", "SmallKitchenAppliances", + "SmoothSubspace", "SonyAIBORobotSurface1", "SonyAIBORobotSurface2", "StarLightCurves", @@ -110,10 +158,11 @@ "Trace", "TwoLeadECG", "TwoPatterns", + "UMD", + "UWaveGestureLibraryAll", "UWaveGestureLibraryX", "UWaveGestureLibraryY", "UWaveGestureLibraryZ", - "UWaveGestureLibraryAll", "Wafer", "Wine", "WordSynonyms", @@ -121,6 +170,9 @@ "WormsTwoClass", "Yoga", "ArticularyWordRecognition", + "AsphaltObstaclesCoordinates", + "AsphaltPavementTypeCoordinates", + "AsphaltRegularityCoordinates", "AtrialFibrillation", "BasicMotions", "CharacterTrajectories", @@ -143,65 +195,22 @@ "NATOPS", "PenDigits", "PEMS-SF", - "Phoneme", + "PhonemeSpectra", "RacketSports", "SelfRegulationSCP1", "SelfRegulationSCP2", "SpokenArabicDigits", "StandWalkJump", "UWaveGestureLibrary", - "ACSF1", - "AllGestureWiimoteX", - "AllGestureWiimoteY", - "AllGestureWiimoteZ", - "BME", - "Chinatown", - "Crop", - "DodgerLoopDay", - "DodgerLoopGame", - "DodgerLoopWeekend", - "EOGHorizontalSignal", - "EOGVerticalSignal", - "EthanolLevel", - "FreezerRegularTrain", - "FreezerSmallTrain", - "Fungi", - "GestureMidAirD1", - "GestureMidAirD2", - "GestureMidAirD3", - "GesturePebbleZ1", - "GesturePebbleZ2", - "GunPointAgeSpan", - "GunPointMaleVersusFemale", - "GunPointOldVersusYoung", - "HouseTwenty", - "InsectEPGRegularTrain", - "InsectEPGSmallTrain", - "MelbournePedestrian", - "MixedShapesRegularTrain", - "MixedShapesSmallTrain", - "NonInvasiveFetalECGThorax1", - "NonInvasiveFetalECGThorax2", - "PLAID", - "PickupGestureWiimoteZ", - "PigAirwayPressure", - "PigArtPressure", - "PigCVP", - "PowerCons", - "Rock", - "SemgHandGenderCh2", - "SemgHandMovementCh2", - "SemgHandSubjectCh2", - "ShakeGestureWiimoteZ", - "SmoothSubspace", - "UMD", + "NonInvasiveFatalECGThorax1", + "NonInvasiveFatalECGThorax2", ] UCR_UEA_DATASETS = {} for i in _ucr_uea_datasets: UCR_UEA_DATASETS[ "ucr_uea_" + i - ] = f"https://www.timeseriesclassification.com/aeon-toolkit/{i}.zip" + ] = f"https://www.timeseriesclassification.com/aeon-toolkit/{i}.zip" DATABASE = {**_DATABASE, **UCR_UEA_DATASETS} AVAILABLE_DATASETS = list(DATABASE.keys())