diff --git a/CITATION b/CITATION new file mode 100644 index 00000000..506b041e --- /dev/null +++ b/CITATION @@ -0,0 +1,14 @@ +@inproceedings{logparser, + author = {Jieming Zhu and + Shilin He and + Jinyang Liu and + Pinjia He and + Qi Xie and + Zibin Zheng and + Michael R. Lyu}, + title = {Tools and benchmarks for automated log parsing}, + booktitle = {Proceedings of the 41st International Conference on Software Engineering: + Software Engineering in Practice (ICSE)}, + pages = {121--130}, + publisher = {{IEEE} / {ACM}}, + year = {2019}} \ No newline at end of file diff --git a/LICENSE.md b/LICENSE similarity index 96% rename from LICENSE.md rename to LICENSE index a4a9242b..853c895d 100644 --- a/LICENSE.md +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018 LogPAI +Copyright (c) 2018-2023 LOGPAI Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -19,3 +19,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/logs/Andriod/Andriod_2k.log b/data/loghub/Andriod/Andriod_2k.log similarity index 100% rename from logs/Andriod/Andriod_2k.log rename to data/loghub/Andriod/Andriod_2k.log diff --git a/logs/Andriod/Andriod_2k.log_structured.csv b/data/loghub/Andriod/Andriod_2k.log_structured.csv similarity index 100% rename from logs/Andriod/Andriod_2k.log_structured.csv rename to data/loghub/Andriod/Andriod_2k.log_structured.csv diff --git a/logs/Andriod/Andriod_2k.log_templates.csv b/data/loghub/Andriod/Andriod_2k.log_templates.csv similarity index 100% rename from logs/Andriod/Andriod_2k.log_templates.csv rename to data/loghub/Andriod/Andriod_2k.log_templates.csv diff --git a/logs/Apache/Apache_2k.log b/data/loghub/Apache/Apache_2k.log similarity index 100% rename from logs/Apache/Apache_2k.log rename to data/loghub/Apache/Apache_2k.log diff --git a/logs/Apache/Apache_2k.log_structured.csv b/data/loghub/Apache/Apache_2k.log_structured.csv similarity index 100% rename from logs/Apache/Apache_2k.log_structured.csv rename to data/loghub/Apache/Apache_2k.log_structured.csv diff --git a/logs/Apache/Apache_2k.log_templates.csv b/data/loghub/Apache/Apache_2k.log_templates.csv similarity index 100% rename from logs/Apache/Apache_2k.log_templates.csv rename to data/loghub/Apache/Apache_2k.log_templates.csv diff --git a/logs/BGL/BGL_2k.log b/data/loghub/BGL/BGL_2k.log similarity index 100% rename from logs/BGL/BGL_2k.log rename to data/loghub/BGL/BGL_2k.log diff --git a/logs/BGL/BGL_2k.log_structured.csv b/data/loghub/BGL/BGL_2k.log_structured.csv similarity index 100% rename from logs/BGL/BGL_2k.log_structured.csv rename to data/loghub/BGL/BGL_2k.log_structured.csv diff --git a/logs/BGL/BGL_2k.log_templates.csv b/data/loghub/BGL/BGL_2k.log_templates.csv similarity index 100% rename from logs/BGL/BGL_2k.log_templates.csv rename to data/loghub/BGL/BGL_2k.log_templates.csv diff --git a/logs/BGL/BGL_templates.csv b/data/loghub/BGL/BGL_templates.csv similarity index 100% rename from logs/BGL/BGL_templates.csv rename to data/loghub/BGL/BGL_templates.csv diff --git a/logs/HDFS/HDFS_2k.log b/data/loghub/HDFS/HDFS_2k.log similarity index 100% rename from logs/HDFS/HDFS_2k.log rename to data/loghub/HDFS/HDFS_2k.log diff --git a/logs/HDFS/HDFS_2k.log_structured.csv b/data/loghub/HDFS/HDFS_2k.log_structured.csv similarity index 100% rename from logs/HDFS/HDFS_2k.log_structured.csv rename to data/loghub/HDFS/HDFS_2k.log_structured.csv diff --git a/logs/HDFS/HDFS_2k.log_templates.csv b/data/loghub/HDFS/HDFS_2k.log_templates.csv similarity index 100% rename from logs/HDFS/HDFS_2k.log_templates.csv rename to data/loghub/HDFS/HDFS_2k.log_templates.csv diff --git a/logs/HDFS/HDFS_templates.csv b/data/loghub/HDFS/HDFS_templates.csv similarity index 100% rename from logs/HDFS/HDFS_templates.csv rename to data/loghub/HDFS/HDFS_templates.csv diff --git a/logs/HPC/HPC_2k.log b/data/loghub/HPC/HPC_2k.log similarity index 100% rename from logs/HPC/HPC_2k.log rename to data/loghub/HPC/HPC_2k.log diff --git a/logs/HPC/HPC_2k.log_structured.csv b/data/loghub/HPC/HPC_2k.log_structured.csv similarity index 100% rename from logs/HPC/HPC_2k.log_structured.csv rename to data/loghub/HPC/HPC_2k.log_structured.csv diff --git a/logs/HPC/HPC_2k.log_templates.csv b/data/loghub/HPC/HPC_2k.log_templates.csv similarity index 100% rename from logs/HPC/HPC_2k.log_templates.csv rename to data/loghub/HPC/HPC_2k.log_templates.csv diff --git a/logs/Hadoop/Hadoop_2k.log b/data/loghub/Hadoop/Hadoop_2k.log similarity index 100% rename from logs/Hadoop/Hadoop_2k.log rename to data/loghub/Hadoop/Hadoop_2k.log diff --git a/logs/Hadoop/Hadoop_2k.log_structured.csv b/data/loghub/Hadoop/Hadoop_2k.log_structured.csv similarity index 100% rename from logs/Hadoop/Hadoop_2k.log_structured.csv rename to data/loghub/Hadoop/Hadoop_2k.log_structured.csv diff --git a/logs/Hadoop/Hadoop_2k.log_templates.csv b/data/loghub/Hadoop/Hadoop_2k.log_templates.csv similarity index 100% rename from logs/Hadoop/Hadoop_2k.log_templates.csv rename to data/loghub/Hadoop/Hadoop_2k.log_templates.csv diff --git a/logs/HealthApp/HealthApp_2k.log b/data/loghub/HealthApp/HealthApp_2k.log similarity index 100% rename from logs/HealthApp/HealthApp_2k.log rename to data/loghub/HealthApp/HealthApp_2k.log diff --git a/logs/HealthApp/HealthApp_2k.log_structured.csv b/data/loghub/HealthApp/HealthApp_2k.log_structured.csv similarity index 100% rename from logs/HealthApp/HealthApp_2k.log_structured.csv rename to data/loghub/HealthApp/HealthApp_2k.log_structured.csv diff --git a/logs/HealthApp/HealthApp_2k.log_templates.csv b/data/loghub/HealthApp/HealthApp_2k.log_templates.csv similarity index 100% rename from logs/HealthApp/HealthApp_2k.log_templates.csv rename to data/loghub/HealthApp/HealthApp_2k.log_templates.csv diff --git a/data/loghub/LICENSE b/data/loghub/LICENSE new file mode 100644 index 00000000..27b09ff7 --- /dev/null +++ b/data/loghub/LICENSE @@ -0,0 +1 @@ +The datasets are freely available for research or academic work, subject to the following conditions: Any usage or distribution of the loghub datasets shall [cite the paper](https://github.com/logpai/loghub/blob/master/CITATION) or refer to the repository https://github.com/logpai/loghub. \ No newline at end of file diff --git a/logs/Linux/Linux_2k.log b/data/loghub/Linux/Linux_2k.log similarity index 100% rename from logs/Linux/Linux_2k.log rename to data/loghub/Linux/Linux_2k.log diff --git a/logs/Linux/Linux_2k.log_structured.csv b/data/loghub/Linux/Linux_2k.log_structured.csv similarity index 100% rename from logs/Linux/Linux_2k.log_structured.csv rename to data/loghub/Linux/Linux_2k.log_structured.csv diff --git a/logs/Linux/Linux_2k.log_templates.csv b/data/loghub/Linux/Linux_2k.log_templates.csv similarity index 100% rename from logs/Linux/Linux_2k.log_templates.csv rename to data/loghub/Linux/Linux_2k.log_templates.csv diff --git a/logs/Mac/Mac.log b/data/loghub/Mac/Mac.log similarity index 100% rename from logs/Mac/Mac.log rename to data/loghub/Mac/Mac.log diff --git a/logs/Mac/Mac_2k.log b/data/loghub/Mac/Mac_2k.log similarity index 100% rename from logs/Mac/Mac_2k.log rename to data/loghub/Mac/Mac_2k.log diff --git a/logs/Mac/Mac_2k.log_structured.csv b/data/loghub/Mac/Mac_2k.log_structured.csv similarity index 100% rename from logs/Mac/Mac_2k.log_structured.csv rename to data/loghub/Mac/Mac_2k.log_structured.csv diff --git a/logs/Mac/Mac_2k.log_templates.csv b/data/loghub/Mac/Mac_2k.log_templates.csv similarity index 100% rename from logs/Mac/Mac_2k.log_templates.csv rename to data/loghub/Mac/Mac_2k.log_templates.csv diff --git a/logs/OpenSSH/OpenSSH_2k.log b/data/loghub/OpenSSH/OpenSSH_2k.log similarity index 100% rename from logs/OpenSSH/OpenSSH_2k.log rename to data/loghub/OpenSSH/OpenSSH_2k.log diff --git a/logs/OpenSSH/OpenSSH_2k.log_structured.csv b/data/loghub/OpenSSH/OpenSSH_2k.log_structured.csv similarity index 100% rename from logs/OpenSSH/OpenSSH_2k.log_structured.csv rename to data/loghub/OpenSSH/OpenSSH_2k.log_structured.csv diff --git a/logs/OpenSSH/OpenSSH_2k.log_templates.csv b/data/loghub/OpenSSH/OpenSSH_2k.log_templates.csv similarity index 100% rename from logs/OpenSSH/OpenSSH_2k.log_templates.csv rename to data/loghub/OpenSSH/OpenSSH_2k.log_templates.csv diff --git a/logs/OpenStack/OpenStack_2k.log b/data/loghub/OpenStack/OpenStack_2k.log similarity index 100% rename from logs/OpenStack/OpenStack_2k.log rename to data/loghub/OpenStack/OpenStack_2k.log diff --git a/logs/OpenStack/OpenStack_2k.log_structured.csv b/data/loghub/OpenStack/OpenStack_2k.log_structured.csv similarity index 100% rename from logs/OpenStack/OpenStack_2k.log_structured.csv rename to data/loghub/OpenStack/OpenStack_2k.log_structured.csv diff --git a/logs/OpenStack/OpenStack_2k.log_templates.csv b/data/loghub/OpenStack/OpenStack_2k.log_templates.csv similarity index 100% rename from logs/OpenStack/OpenStack_2k.log_templates.csv rename to data/loghub/OpenStack/OpenStack_2k.log_templates.csv diff --git a/logs/Proxifier/Proxifier_2k.log b/data/loghub/Proxifier/Proxifier_2k.log similarity index 100% rename from logs/Proxifier/Proxifier_2k.log rename to data/loghub/Proxifier/Proxifier_2k.log diff --git a/logs/Proxifier/Proxifier_2k.log_structured.csv b/data/loghub/Proxifier/Proxifier_2k.log_structured.csv similarity index 100% rename from logs/Proxifier/Proxifier_2k.log_structured.csv rename to data/loghub/Proxifier/Proxifier_2k.log_structured.csv diff --git a/logs/Proxifier/Proxifier_2k.log_templates.csv b/data/loghub/Proxifier/Proxifier_2k.log_templates.csv similarity index 100% rename from logs/Proxifier/Proxifier_2k.log_templates.csv rename to data/loghub/Proxifier/Proxifier_2k.log_templates.csv diff --git a/data/loghub/README.md b/data/loghub/README.md new file mode 100644 index 00000000..1ddb32b7 --- /dev/null +++ b/data/loghub/README.md @@ -0,0 +1,73 @@ +
+ +# Loghub +Loghub maintains a collection of system logs, which are freely accessible for research purposes. Some of the logs are production data released from previous studies, while some others are collected from real systems in our lab environment. Wherever possible, the logs are NOT sanitized, anonymized or modified in any way. These log datasets are freely available for research or academic work. + +**Logs currently available**: + +| Dataset | Description | Labeled | Time Span | #Lines | Data Size | +| :---------------------------- | :--------| :--------: | --------: | ---------: | ------: | +|