From 9df338d212cdabaaf54f6df4c0ce1d95e3780d91 Mon Sep 17 00:00:00 2001 From: zhujiem Date: Tue, 18 Jul 2023 14:32:00 +0800 Subject: [PATCH] Add citation and license --- CITATION | 14 ++++ LICENSE.md => LICENSE | 3 +- {logs => data/loghub}/Andriod/Andriod_2k.log | 0 .../Andriod/Andriod_2k.log_structured.csv | 0 .../Andriod/Andriod_2k.log_templates.csv | 0 {logs => data/loghub}/Apache/Apache_2k.log | 0 .../Apache/Apache_2k.log_structured.csv | 0 .../Apache/Apache_2k.log_templates.csv | 0 {logs => data/loghub}/BGL/BGL_2k.log | 0 .../loghub}/BGL/BGL_2k.log_structured.csv | 0 .../loghub}/BGL/BGL_2k.log_templates.csv | 0 {logs => data/loghub}/BGL/BGL_templates.csv | 0 {logs => data/loghub}/HDFS/HDFS_2k.log | 0 .../loghub}/HDFS/HDFS_2k.log_structured.csv | 0 .../loghub}/HDFS/HDFS_2k.log_templates.csv | 0 {logs => data/loghub}/HDFS/HDFS_templates.csv | 0 {logs => data/loghub}/HPC/HPC_2k.log | 0 .../loghub}/HPC/HPC_2k.log_structured.csv | 0 .../loghub}/HPC/HPC_2k.log_templates.csv | 0 {logs => data/loghub}/Hadoop/Hadoop_2k.log | 0 .../Hadoop/Hadoop_2k.log_structured.csv | 0 .../Hadoop/Hadoop_2k.log_templates.csv | 0 .../loghub}/HealthApp/HealthApp_2k.log | 0 .../HealthApp/HealthApp_2k.log_structured.csv | 0 .../HealthApp/HealthApp_2k.log_templates.csv | 0 data/loghub/LICENSE | 1 + {logs => data/loghub}/Linux/Linux_2k.log | 0 .../loghub}/Linux/Linux_2k.log_structured.csv | 0 .../loghub}/Linux/Linux_2k.log_templates.csv | 0 {logs => data/loghub}/Mac/Mac.log | 0 {logs => data/loghub}/Mac/Mac_2k.log | 0 .../loghub}/Mac/Mac_2k.log_structured.csv | 0 .../loghub}/Mac/Mac_2k.log_templates.csv | 0 {logs => data/loghub}/OpenSSH/OpenSSH_2k.log | 0 .../OpenSSH/OpenSSH_2k.log_structured.csv | 0 .../OpenSSH/OpenSSH_2k.log_templates.csv | 0 .../loghub}/OpenStack/OpenStack_2k.log | 0 .../OpenStack/OpenStack_2k.log_structured.csv | 0 .../OpenStack/OpenStack_2k.log_templates.csv | 0 .../loghub}/Proxifier/Proxifier_2k.log | 0 .../Proxifier/Proxifier_2k.log_structured.csv | 0 .../Proxifier/Proxifier_2k.log_templates.csv | 0 data/loghub/README.md | 73 +++++++++++++++++++ {logs => data/loghub}/Spark/Spark_2k.log | 0 .../loghub}/Spark/Spark_2k.log_structured.csv | 0 .../loghub}/Spark/Spark_2k.log_templates.csv | 0 .../loghub}/Thunderbird/Thunderbird_2k.log | 0 .../Thunderbird_2k.log_structured.csv | 0 .../Thunderbird_2k.log_templates.csv | 0 {logs => data/loghub}/Windows/Windows_2k.log | 0 .../Windows/Windows_2k.log_structured.csv | 0 .../Windows/Windows_2k.log_templates.csv | 0 .../loghub}/Zookeeper/Zookeeper_2k.log | 0 .../Zookeeper/Zookeeper_2k.log_structured.csv | 0 .../Zookeeper/Zookeeper_2k.log_templates.csv | 0 {test => tests}/test_py2.sh | 0 {test => tests}/test_py3.sh | 0 57 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 CITATION rename LICENSE.md => LICENSE (96%) rename {logs => data/loghub}/Andriod/Andriod_2k.log (100%) rename {logs => data/loghub}/Andriod/Andriod_2k.log_structured.csv (100%) rename {logs => data/loghub}/Andriod/Andriod_2k.log_templates.csv (100%) rename {logs => data/loghub}/Apache/Apache_2k.log (100%) rename {logs => data/loghub}/Apache/Apache_2k.log_structured.csv (100%) rename {logs => data/loghub}/Apache/Apache_2k.log_templates.csv (100%) rename {logs => data/loghub}/BGL/BGL_2k.log (100%) rename {logs => data/loghub}/BGL/BGL_2k.log_structured.csv (100%) rename {logs => data/loghub}/BGL/BGL_2k.log_templates.csv (100%) rename {logs => data/loghub}/BGL/BGL_templates.csv (100%) rename {logs => data/loghub}/HDFS/HDFS_2k.log (100%) rename {logs => data/loghub}/HDFS/HDFS_2k.log_structured.csv (100%) rename {logs => data/loghub}/HDFS/HDFS_2k.log_templates.csv (100%) rename {logs => data/loghub}/HDFS/HDFS_templates.csv (100%) rename {logs => data/loghub}/HPC/HPC_2k.log (100%) rename {logs => data/loghub}/HPC/HPC_2k.log_structured.csv (100%) rename {logs => data/loghub}/HPC/HPC_2k.log_templates.csv (100%) rename {logs => data/loghub}/Hadoop/Hadoop_2k.log (100%) rename {logs => data/loghub}/Hadoop/Hadoop_2k.log_structured.csv (100%) rename {logs => data/loghub}/Hadoop/Hadoop_2k.log_templates.csv (100%) rename {logs => data/loghub}/HealthApp/HealthApp_2k.log (100%) rename {logs => data/loghub}/HealthApp/HealthApp_2k.log_structured.csv (100%) rename {logs => data/loghub}/HealthApp/HealthApp_2k.log_templates.csv (100%) create mode 100644 data/loghub/LICENSE rename {logs => data/loghub}/Linux/Linux_2k.log (100%) rename {logs => data/loghub}/Linux/Linux_2k.log_structured.csv (100%) rename {logs => data/loghub}/Linux/Linux_2k.log_templates.csv (100%) rename {logs => data/loghub}/Mac/Mac.log (100%) rename {logs => data/loghub}/Mac/Mac_2k.log (100%) rename {logs => data/loghub}/Mac/Mac_2k.log_structured.csv (100%) rename {logs => data/loghub}/Mac/Mac_2k.log_templates.csv (100%) rename {logs => data/loghub}/OpenSSH/OpenSSH_2k.log (100%) rename {logs => data/loghub}/OpenSSH/OpenSSH_2k.log_structured.csv (100%) rename {logs => data/loghub}/OpenSSH/OpenSSH_2k.log_templates.csv (100%) rename {logs => data/loghub}/OpenStack/OpenStack_2k.log (100%) rename {logs => data/loghub}/OpenStack/OpenStack_2k.log_structured.csv (100%) rename {logs => data/loghub}/OpenStack/OpenStack_2k.log_templates.csv (100%) rename {logs => data/loghub}/Proxifier/Proxifier_2k.log (100%) rename {logs => data/loghub}/Proxifier/Proxifier_2k.log_structured.csv (100%) rename {logs => data/loghub}/Proxifier/Proxifier_2k.log_templates.csv (100%) create mode 100644 data/loghub/README.md rename {logs => data/loghub}/Spark/Spark_2k.log (100%) rename {logs => data/loghub}/Spark/Spark_2k.log_structured.csv (100%) rename {logs => data/loghub}/Spark/Spark_2k.log_templates.csv (100%) rename {logs => data/loghub}/Thunderbird/Thunderbird_2k.log (100%) rename {logs => data/loghub}/Thunderbird/Thunderbird_2k.log_structured.csv (100%) rename {logs => data/loghub}/Thunderbird/Thunderbird_2k.log_templates.csv (100%) rename {logs => data/loghub}/Windows/Windows_2k.log (100%) rename {logs => data/loghub}/Windows/Windows_2k.log_structured.csv (100%) rename {logs => data/loghub}/Windows/Windows_2k.log_templates.csv (100%) rename {logs => data/loghub}/Zookeeper/Zookeeper_2k.log (100%) rename {logs => data/loghub}/Zookeeper/Zookeeper_2k.log_structured.csv (100%) rename {logs => data/loghub}/Zookeeper/Zookeeper_2k.log_templates.csv (100%) rename {test => tests}/test_py2.sh (100%) rename {test => tests}/test_py3.sh (100%) diff --git a/CITATION b/CITATION new file mode 100644 index 00000000..506b041e --- /dev/null +++ b/CITATION @@ -0,0 +1,14 @@ +@inproceedings{logparser, + author = {Jieming Zhu and + Shilin He and + Jinyang Liu and + Pinjia He and + Qi Xie and + Zibin Zheng and + Michael R. Lyu}, + title = {Tools and benchmarks for automated log parsing}, + booktitle = {Proceedings of the 41st International Conference on Software Engineering: + Software Engineering in Practice (ICSE)}, + pages = {121--130}, + publisher = {{IEEE} / {ACM}}, + year = {2019}} \ No newline at end of file diff --git a/LICENSE.md b/LICENSE similarity index 96% rename from LICENSE.md rename to LICENSE index a4a9242b..853c895d 100644 --- a/LICENSE.md +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 LogPAI +Copyright (c) 2018-2023 LOGPAI Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -19,3 +19,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/logs/Andriod/Andriod_2k.log b/data/loghub/Andriod/Andriod_2k.log similarity index 100% rename from logs/Andriod/Andriod_2k.log rename to data/loghub/Andriod/Andriod_2k.log diff --git a/logs/Andriod/Andriod_2k.log_structured.csv b/data/loghub/Andriod/Andriod_2k.log_structured.csv similarity index 100% rename from logs/Andriod/Andriod_2k.log_structured.csv rename to data/loghub/Andriod/Andriod_2k.log_structured.csv diff --git a/logs/Andriod/Andriod_2k.log_templates.csv b/data/loghub/Andriod/Andriod_2k.log_templates.csv similarity index 100% rename from logs/Andriod/Andriod_2k.log_templates.csv rename to data/loghub/Andriod/Andriod_2k.log_templates.csv diff --git a/logs/Apache/Apache_2k.log b/data/loghub/Apache/Apache_2k.log similarity index 100% rename from logs/Apache/Apache_2k.log rename to data/loghub/Apache/Apache_2k.log diff --git a/logs/Apache/Apache_2k.log_structured.csv b/data/loghub/Apache/Apache_2k.log_structured.csv similarity index 100% rename from logs/Apache/Apache_2k.log_structured.csv rename to data/loghub/Apache/Apache_2k.log_structured.csv diff --git a/logs/Apache/Apache_2k.log_templates.csv b/data/loghub/Apache/Apache_2k.log_templates.csv similarity index 100% rename from logs/Apache/Apache_2k.log_templates.csv rename to data/loghub/Apache/Apache_2k.log_templates.csv diff --git a/logs/BGL/BGL_2k.log b/data/loghub/BGL/BGL_2k.log similarity index 100% rename from logs/BGL/BGL_2k.log rename to data/loghub/BGL/BGL_2k.log diff --git a/logs/BGL/BGL_2k.log_structured.csv b/data/loghub/BGL/BGL_2k.log_structured.csv similarity index 100% rename from logs/BGL/BGL_2k.log_structured.csv rename to data/loghub/BGL/BGL_2k.log_structured.csv diff --git a/logs/BGL/BGL_2k.log_templates.csv b/data/loghub/BGL/BGL_2k.log_templates.csv similarity index 100% rename from logs/BGL/BGL_2k.log_templates.csv rename to data/loghub/BGL/BGL_2k.log_templates.csv diff --git a/logs/BGL/BGL_templates.csv b/data/loghub/BGL/BGL_templates.csv similarity index 100% rename from logs/BGL/BGL_templates.csv rename to data/loghub/BGL/BGL_templates.csv diff --git a/logs/HDFS/HDFS_2k.log b/data/loghub/HDFS/HDFS_2k.log similarity index 100% rename from logs/HDFS/HDFS_2k.log rename to data/loghub/HDFS/HDFS_2k.log diff --git a/logs/HDFS/HDFS_2k.log_structured.csv b/data/loghub/HDFS/HDFS_2k.log_structured.csv similarity index 100% rename from logs/HDFS/HDFS_2k.log_structured.csv rename to data/loghub/HDFS/HDFS_2k.log_structured.csv diff --git a/logs/HDFS/HDFS_2k.log_templates.csv b/data/loghub/HDFS/HDFS_2k.log_templates.csv similarity index 100% rename from logs/HDFS/HDFS_2k.log_templates.csv rename to data/loghub/HDFS/HDFS_2k.log_templates.csv diff --git a/logs/HDFS/HDFS_templates.csv b/data/loghub/HDFS/HDFS_templates.csv similarity index 100% rename from logs/HDFS/HDFS_templates.csv rename to data/loghub/HDFS/HDFS_templates.csv diff --git a/logs/HPC/HPC_2k.log b/data/loghub/HPC/HPC_2k.log similarity index 100% rename from logs/HPC/HPC_2k.log rename to data/loghub/HPC/HPC_2k.log diff --git a/logs/HPC/HPC_2k.log_structured.csv b/data/loghub/HPC/HPC_2k.log_structured.csv similarity index 100% rename from logs/HPC/HPC_2k.log_structured.csv rename to data/loghub/HPC/HPC_2k.log_structured.csv diff --git a/logs/HPC/HPC_2k.log_templates.csv b/data/loghub/HPC/HPC_2k.log_templates.csv similarity index 100% rename from logs/HPC/HPC_2k.log_templates.csv rename to data/loghub/HPC/HPC_2k.log_templates.csv diff --git a/logs/Hadoop/Hadoop_2k.log b/data/loghub/Hadoop/Hadoop_2k.log similarity index 100% rename from logs/Hadoop/Hadoop_2k.log rename to data/loghub/Hadoop/Hadoop_2k.log diff --git a/logs/Hadoop/Hadoop_2k.log_structured.csv b/data/loghub/Hadoop/Hadoop_2k.log_structured.csv similarity index 100% rename from logs/Hadoop/Hadoop_2k.log_structured.csv rename to data/loghub/Hadoop/Hadoop_2k.log_structured.csv diff --git a/logs/Hadoop/Hadoop_2k.log_templates.csv b/data/loghub/Hadoop/Hadoop_2k.log_templates.csv similarity index 100% rename from logs/Hadoop/Hadoop_2k.log_templates.csv rename to data/loghub/Hadoop/Hadoop_2k.log_templates.csv diff --git a/logs/HealthApp/HealthApp_2k.log b/data/loghub/HealthApp/HealthApp_2k.log similarity index 100% rename from logs/HealthApp/HealthApp_2k.log rename to data/loghub/HealthApp/HealthApp_2k.log diff --git a/logs/HealthApp/HealthApp_2k.log_structured.csv b/data/loghub/HealthApp/HealthApp_2k.log_structured.csv similarity index 100% rename from logs/HealthApp/HealthApp_2k.log_structured.csv rename to data/loghub/HealthApp/HealthApp_2k.log_structured.csv diff --git a/logs/HealthApp/HealthApp_2k.log_templates.csv b/data/loghub/HealthApp/HealthApp_2k.log_templates.csv similarity index 100% rename from logs/HealthApp/HealthApp_2k.log_templates.csv rename to data/loghub/HealthApp/HealthApp_2k.log_templates.csv diff --git a/data/loghub/LICENSE b/data/loghub/LICENSE new file mode 100644 index 00000000..27b09ff7 --- /dev/null +++ b/data/loghub/LICENSE @@ -0,0 +1 @@ +The datasets are freely available for research or academic work, subject to the following conditions: Any usage or distribution of the loghub datasets shall [cite the paper](https://github.com/logpai/loghub/blob/master/CITATION) or refer to the repository https://github.com/logpai/loghub. \ No newline at end of file diff --git a/logs/Linux/Linux_2k.log b/data/loghub/Linux/Linux_2k.log similarity index 100% rename from logs/Linux/Linux_2k.log rename to data/loghub/Linux/Linux_2k.log diff --git a/logs/Linux/Linux_2k.log_structured.csv b/data/loghub/Linux/Linux_2k.log_structured.csv similarity index 100% rename from logs/Linux/Linux_2k.log_structured.csv rename to data/loghub/Linux/Linux_2k.log_structured.csv diff --git a/logs/Linux/Linux_2k.log_templates.csv b/data/loghub/Linux/Linux_2k.log_templates.csv similarity index 100% rename from logs/Linux/Linux_2k.log_templates.csv rename to data/loghub/Linux/Linux_2k.log_templates.csv diff --git a/logs/Mac/Mac.log b/data/loghub/Mac/Mac.log similarity index 100% rename from logs/Mac/Mac.log rename to data/loghub/Mac/Mac.log diff --git a/logs/Mac/Mac_2k.log b/data/loghub/Mac/Mac_2k.log similarity index 100% rename from logs/Mac/Mac_2k.log rename to data/loghub/Mac/Mac_2k.log diff --git a/logs/Mac/Mac_2k.log_structured.csv b/data/loghub/Mac/Mac_2k.log_structured.csv similarity index 100% rename from logs/Mac/Mac_2k.log_structured.csv rename to data/loghub/Mac/Mac_2k.log_structured.csv diff --git a/logs/Mac/Mac_2k.log_templates.csv b/data/loghub/Mac/Mac_2k.log_templates.csv similarity index 100% rename from logs/Mac/Mac_2k.log_templates.csv rename to data/loghub/Mac/Mac_2k.log_templates.csv diff --git a/logs/OpenSSH/OpenSSH_2k.log b/data/loghub/OpenSSH/OpenSSH_2k.log similarity index 100% rename from logs/OpenSSH/OpenSSH_2k.log rename to data/loghub/OpenSSH/OpenSSH_2k.log diff --git a/logs/OpenSSH/OpenSSH_2k.log_structured.csv b/data/loghub/OpenSSH/OpenSSH_2k.log_structured.csv similarity index 100% rename from logs/OpenSSH/OpenSSH_2k.log_structured.csv rename to data/loghub/OpenSSH/OpenSSH_2k.log_structured.csv diff --git a/logs/OpenSSH/OpenSSH_2k.log_templates.csv b/data/loghub/OpenSSH/OpenSSH_2k.log_templates.csv similarity index 100% rename from logs/OpenSSH/OpenSSH_2k.log_templates.csv rename to data/loghub/OpenSSH/OpenSSH_2k.log_templates.csv diff --git a/logs/OpenStack/OpenStack_2k.log b/data/loghub/OpenStack/OpenStack_2k.log similarity index 100% rename from logs/OpenStack/OpenStack_2k.log rename to data/loghub/OpenStack/OpenStack_2k.log diff --git a/logs/OpenStack/OpenStack_2k.log_structured.csv b/data/loghub/OpenStack/OpenStack_2k.log_structured.csv similarity index 100% rename from logs/OpenStack/OpenStack_2k.log_structured.csv rename to data/loghub/OpenStack/OpenStack_2k.log_structured.csv diff --git a/logs/OpenStack/OpenStack_2k.log_templates.csv b/data/loghub/OpenStack/OpenStack_2k.log_templates.csv similarity index 100% rename from logs/OpenStack/OpenStack_2k.log_templates.csv rename to data/loghub/OpenStack/OpenStack_2k.log_templates.csv diff --git a/logs/Proxifier/Proxifier_2k.log b/data/loghub/Proxifier/Proxifier_2k.log similarity index 100% rename from logs/Proxifier/Proxifier_2k.log rename to data/loghub/Proxifier/Proxifier_2k.log diff --git a/logs/Proxifier/Proxifier_2k.log_structured.csv b/data/loghub/Proxifier/Proxifier_2k.log_structured.csv similarity index 100% rename from logs/Proxifier/Proxifier_2k.log_structured.csv rename to data/loghub/Proxifier/Proxifier_2k.log_structured.csv diff --git a/logs/Proxifier/Proxifier_2k.log_templates.csv b/data/loghub/Proxifier/Proxifier_2k.log_templates.csv similarity index 100% rename from logs/Proxifier/Proxifier_2k.log_templates.csv rename to data/loghub/Proxifier/Proxifier_2k.log_templates.csv diff --git a/data/loghub/README.md b/data/loghub/README.md new file mode 100644 index 00000000..1ddb32b7 --- /dev/null +++ b/data/loghub/README.md @@ -0,0 +1,73 @@ +

+ +# Loghub +Loghub maintains a collection of system logs, which are freely accessible for research purposes. Some of the logs are production data released from previous studies, while some others are collected from real systems in our lab environment. Wherever possible, the logs are NOT sanitized, anonymized or modified in any way. These log datasets are freely available for research or academic work. + +**Logs currently available**: + +| Dataset | Description | Labeled | Time Span | #Lines | Data Size | +| :---------------------------- | :--------| :--------: | --------: | ---------: | ------: | +|:open_file_folder: **Distributed systems**| +| [HDFS_v1](./HDFS#hdfs_v1) | Hadoop distributed file system log | :heavy_check_mark: | 38.7 hours | 11,175,629 | 1.47GB | +| [HDFS_v2](./HDFS#hdfs_v2) | Hadoop distributed file system log| | N.A. | 71,118,073 | 16.06GB | +| [Hadoop](./Hadoop) | Hadoop mapreduce job log | :heavy_check_mark: | N.A. | 394,308 | 48.61MB | +| [Spark](./Spark) | Spark job log || N.A. | 33,236,604 | 2.75GB | +| [Zookeeper](./Zookeeper) | ZooKeeper service log | | 26.7 days | 74,380 | 9.95MB | +| [OpenStack](./OpenStack) | OpenStack infrastructure log | :heavy_check_mark: | N.A. | 207,820 | 58.61MB | +|:open_file_folder: **Super computers**| +| [BGL](./BGL) | Blue Gene/L supercomputer log | :heavy_check_mark: | 214.7 days | 4,747,963 | 708.76MB | +| [HPC](./HPC) | High performance cluster log | | N.A. | 433,489 | 32.00MB | +| [Thunderbird](./Thunderbird) | Thunderbird supercomputer log | :heavy_check_mark: | 244 days | 211,212,192 | 29.60GB | +|:open_file_folder: **Operating systems**| +| [Windows](./Windows) | Windows event log | | 226.7 days | 114,608,388 | 26.09GB | +| [Linux](./Linux) | Linux system log | | 263.9 days | 25,567 | 2.25MB | +| [Mac](./Mac) | Mac OS log | | 7.0 days | 117,283 | 16.09MB | +|:open_file_folder: **Mobile systems**| +| [Android](./Android) | Android framework log | | N.A. | 1,555,005 | 183.37MB | +| [HealthApp](./HealthApp) | Health app log | | 10.5 days | 253,395 | 22.44MB | +|:open_file_folder: **Server applications**| +| [Apache](./Apache) | Apache web server error log | | 263.9 days | 56,481 | 4.90MB | +| [OpenSSH](./OpenSSH) | OpenSSH server log | | 28.4 days | 655,146 | 70.02MB | +|:open_file_folder: **Standalone software**| +| [Proxifier](./Proxifier) | Proxifier software log | | N.A. | 21,329 | 2.42MB | + + +### Datasets download +We host only a small sample (2k lines) of each log dataset on Github. If you are interested in these datasets, please download [the raw logs at Zenodo](https://doi.org/10.5281/zenodo.1144100). + +:telescope: We proudly announce that the loghub datasets have been downloaded [**48000+**](https://zenodo.org/record/3227177) times by more than [**380+ organizations**](https://github.com/logpai/loghub/wiki/Loghub) from both industry and academia. + + +### Citation +:bell: If you use the loghub datasets in your work, please cite the following paper. ++ Shilin He, Jieming Zhu, Pinjia He, Michael R. Lyu. [Loghub: A Large Collection of System Log Datasets towards Automated Log Analytics](https://arxiv.org/abs/2008.06448). *Arxiv*, 2020. + + +### Publications using loghub datasets + +| Publication | Paper Title | +| :----: | :---- | +| DSN'07 | Adam J. Oliner, Jon Stearley. [What Supercomputers Say: A Study of Five System Logs](http://ieeexplore.ieee.org/document/4273008/). IEEE/IFIP International Conference on Dependable Systems and Networks (DSN), 2007. | +| SOSP'09 | Wei Xu, Ling Huang, Armando Fox, David A. Patterson, Michael I. Jordan. [Detecting Large-Scale System Problems by Mining Console Logs](https://www.sigops.org/sosp/sosp09/papers/xu-sosp09.pdf). ACM Symposium on Operating Systems Principles (SOSP), 2009. | +| KDD'09 | Adetokunbo Makanju, A. Nur Zincir-Heywood, Evangelos E. Milios. [Clustering Event Logs Using Iterative Partitioning](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.503.7668&rep=rep1&type=pdf). ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), 2009. | +| ISSRE'16 | Shilin He, Jieming Zhu, Pinjia He, Michael R. Lyu. [Experience Report: System Log Analysis for Anomaly Detection](https://jiemingzhu.github.io/pub/slhe_issre2016.pdf). IEEE International Symposium on Software Reliability Engineering (ISSRE), 2016. | +| DSN'16 | Pinjia He, Jieming Zhu, Shilin He, Jian Li, Michael R. Lyu. [An Evaluation Study on Log Parsing and Its Use in Log Mining](https://jiemingzhu.github.io/pub/pjhe_dsn2016.pdf). IEEE/IFIP International Conference on Dependable Systems and Networks (DSN), 2016. | +| ICSE'16 | Qingwei Lin, Hongyu Zhang, Jian-Guang Lou, Yu Zhang, Xuewei Chen. [Log Clustering Based Problem Identification for Online Service Systems](http://ieeexplore.ieee.org/document/7883294/). International Conference on Software Engineering (ICSE), 2016. | +| ICWS'17 | Pinjia He, Jieming Zhu, Zibin Zheng, Michael R. Lyu. [Drain: An Online Log Parsing Approach with Fixed Depth Tree](https://jiemingzhu.github.io/pub/pjhe_icws2017.pdf). IEEE International Conference on Web Services (ICWS), 2017. | +| CCS'17 | Min Du, Feifei Li, Guineng Zheng, Vivek Srikumar. [DeepLog: Anomaly Detection and Diagnosis from System Logs through Deep Learning](https://acmccs.github.io/papers/p1285-duA.pdf). ACM Conference on Computer and Communications Security (CCS), 2017. | +| TDSC'18 | Pinjia He, Jieming Zhu, Shilin He, Jian Li, Michael R. Lyu. [Towards Automated Log Parsing for Large-Scale Log Data Analysis](https://ieeexplore.ieee.org/document/8067504). IEEE Transactions on Dependable and Secure Computing (TDSC), 2018. | +| TKDE'18 | Min Du, Feifei Li. [Spell: Online Streaming Parsing of Large Unstructured System Logs](https://ieeexplore.ieee.org/abstract/document/8489912). IEEE Transactions on Knowledge and Data Engineering (TKDE), 2018. | +| ASE'19 | Jinyang Liu, Jieming Zhu, Shilin He, Pinjia He, Zibin Zheng, Michael R. Lyu. [Logzip: Extracting Hidden Structures via Iterative Clustering for Log Compression](). To appear in IEEE/ACM International Conference on Automated Software Engineering (ASE), 2019. | +| ICSE'19 | Jieming Zhu, Shilin He, Jinyang Liu, Pinjia He, Qi Xie, Zibin Zheng, Michael R. Lyu. [Tools and Benchmarks for Automated Log Parsing](https://arxiv.org/pdf/1811.03509.pdf). International Conference on Software Engineering (ICSE), 2019. | +| ICSE'22 | Zanis Ali Khan, Donghwan Shin, Domenico Bianculli, Lionel Briand. [Guidelines for Assessing the Accuracy of Log Message Template Identification Techniques](https://dl.acm.org/doi/pdf/10.1145/3510003.3510101). International Conference on Software Engineering (ICSE), 2023. | +| ICSE'23 | Van-Hoang Le, Hongyu Zhang. [Log Parsing with Prompt-based Few-shot Learning](https://arxiv.org/abs/2302.07435). International Conference on Software Engineering (ICSE), 2023. | +| ICSE'23 | Zhenhao Li, Chuan Luo, Tse-Hsun Chen, Weiyi Shang, Shilin He, Qingwei Lin, Dongmei Zhang. [Did We Miss Something Important? Studying and Exploring Variable-Aware Log Abstraction](https://arxiv.org/abs/2304.11391). International Conference on Software Engineering (ICSE), 2023. | +| ICSE'23 | Yintong Huo, Yuxin Su, Cheryl Lee, Michael R. Lyu. [SemParser: A Semantic Parser for Log Analysis](https://arxiv.org/abs/2112.12636). International Conference on Software Engineering (ICSE), 2023. | +| WWW'23 | Liming Wang, Hong Xie, Ye Li, Jian Tan, John C.S. Lui. [Interactive Log Parsing via Light-weight User Feedback](https://arxiv.org/abs/2301.12225). ACM Web Conference, 2023. | +| TSC'23 | Siyu Yu, Pinjia He, Ningjiang Chen, Yifan Wu. [Brain: Log Parsing with Bidirectional Parallel Tree](https://ieeexplore.ieee.org/document/10109145). IEEE Transaction on Severice Computing, 2023. | + +### Discussion +For any questions or feedback, please [open a discussion here](https://github.com/logpai/loghub/discussions/new/choose). + +### License +The datasets are freely available for research or academic work, subject to the following conditions: Any usage or distribution of the loghub datasets shall [cite the paper](https://github.com/logpai/loghub/blob/master/CITATION) or refer to the repository https://github.com/logpai/loghub. diff --git a/logs/Spark/Spark_2k.log b/data/loghub/Spark/Spark_2k.log similarity index 100% rename from logs/Spark/Spark_2k.log rename to data/loghub/Spark/Spark_2k.log diff --git a/logs/Spark/Spark_2k.log_structured.csv b/data/loghub/Spark/Spark_2k.log_structured.csv similarity index 100% rename from logs/Spark/Spark_2k.log_structured.csv rename to data/loghub/Spark/Spark_2k.log_structured.csv diff --git a/logs/Spark/Spark_2k.log_templates.csv b/data/loghub/Spark/Spark_2k.log_templates.csv similarity index 100% rename from logs/Spark/Spark_2k.log_templates.csv rename to data/loghub/Spark/Spark_2k.log_templates.csv diff --git a/logs/Thunderbird/Thunderbird_2k.log b/data/loghub/Thunderbird/Thunderbird_2k.log similarity index 100% rename from logs/Thunderbird/Thunderbird_2k.log rename to data/loghub/Thunderbird/Thunderbird_2k.log diff --git a/logs/Thunderbird/Thunderbird_2k.log_structured.csv b/data/loghub/Thunderbird/Thunderbird_2k.log_structured.csv similarity index 100% rename from logs/Thunderbird/Thunderbird_2k.log_structured.csv rename to data/loghub/Thunderbird/Thunderbird_2k.log_structured.csv diff --git a/logs/Thunderbird/Thunderbird_2k.log_templates.csv b/data/loghub/Thunderbird/Thunderbird_2k.log_templates.csv similarity index 100% rename from logs/Thunderbird/Thunderbird_2k.log_templates.csv rename to data/loghub/Thunderbird/Thunderbird_2k.log_templates.csv diff --git a/logs/Windows/Windows_2k.log b/data/loghub/Windows/Windows_2k.log similarity index 100% rename from logs/Windows/Windows_2k.log rename to data/loghub/Windows/Windows_2k.log diff --git a/logs/Windows/Windows_2k.log_structured.csv b/data/loghub/Windows/Windows_2k.log_structured.csv similarity index 100% rename from logs/Windows/Windows_2k.log_structured.csv rename to data/loghub/Windows/Windows_2k.log_structured.csv diff --git a/logs/Windows/Windows_2k.log_templates.csv b/data/loghub/Windows/Windows_2k.log_templates.csv similarity index 100% rename from logs/Windows/Windows_2k.log_templates.csv rename to data/loghub/Windows/Windows_2k.log_templates.csv diff --git a/logs/Zookeeper/Zookeeper_2k.log b/data/loghub/Zookeeper/Zookeeper_2k.log similarity index 100% rename from logs/Zookeeper/Zookeeper_2k.log rename to data/loghub/Zookeeper/Zookeeper_2k.log diff --git a/logs/Zookeeper/Zookeeper_2k.log_structured.csv b/data/loghub/Zookeeper/Zookeeper_2k.log_structured.csv similarity index 100% rename from logs/Zookeeper/Zookeeper_2k.log_structured.csv rename to data/loghub/Zookeeper/Zookeeper_2k.log_structured.csv diff --git a/logs/Zookeeper/Zookeeper_2k.log_templates.csv b/data/loghub/Zookeeper/Zookeeper_2k.log_templates.csv similarity index 100% rename from logs/Zookeeper/Zookeeper_2k.log_templates.csv rename to data/loghub/Zookeeper/Zookeeper_2k.log_templates.csv diff --git a/test/test_py2.sh b/tests/test_py2.sh similarity index 100% rename from test/test_py2.sh rename to tests/test_py2.sh diff --git a/test/test_py3.sh b/tests/test_py3.sh similarity index 100% rename from test/test_py3.sh rename to tests/test_py3.sh